aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h11
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h47
-rw-r--r--llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h4
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h29
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h6
-rw-r--r--llvm/include/llvm/IR/AbstractCallSite.h10
-rw-r--r--llvm/include/llvm/IR/IRBuilder.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp13
-rw-r--r--llvm/lib/CodeGen/SwitchLoweringUtils.cpp22
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp21
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFContext.cpp19
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp22
-rw-r--r--llvm/lib/Target/AArch64/AArch64.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64Processors.td10
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td2777
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td2705
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp6
-rw-r--r--llvm/lib/Target/AArch64/MachineSMEABIPass.cpp150
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp238
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp25
-rw-r--r--llvm/lib/Target/DirectX/DXILOpLowering.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp29
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp8
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFuture.td20
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp57
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp19
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h30
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp22
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp133
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir7
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir15
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir14
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir11
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir4
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir32
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir7
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir4
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir14
-rw-r--r--llvm/test/CodeGen/AArch64/dup.ll90
-rw-r--r--llvm/test/CodeGen/AArch64/sme-agnostic-za.ll7
-rw-r--r--llvm/test/CodeGen/AArch64/sme-za-control-flow.ll107
-rw-r--r--llvm/test/CodeGen/AArch64/sme-za-exceptions.ll18
-rw-r--r--llvm/test/CodeGen/AArch64/sme-za-function-with-many-blocks.ll296
-rw-r--r--llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll110
-rw-r--r--llvm/test/CodeGen/AArch64/sme-zt0-state.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir6
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir50
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir238
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir73
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir25
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir64
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir103
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir82
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir73
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir49
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir80
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir193
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir70
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir85
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir84
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir84
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir82
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir83
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir20
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir12
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir67
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir67
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir80
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir48
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir57
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir33
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir197
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir118
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir80
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir115
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir57
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll33
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll52
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll27
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir359
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir698
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir1630
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir688
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir665
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir107
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir180
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir286
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir29
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir80
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir65
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir202
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir73
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir94
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir94
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir55
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir29
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir81
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir171
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir42
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir31
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir82
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir93
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir93
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir20
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir56
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir31
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir82
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir176
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir35
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll41
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll118
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/branch-relaxation.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll27
-rw-r--r--llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll27
-rw-r--r--llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir16
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/nor-divergent-lanemask.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/skip-if-dead.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir388
-rw-r--r--llvm/test/CodeGen/Hexagon/inst_masked_store_bug1.ll94
-rw-r--r--llvm/test/CodeGen/Mips/GlobalISel/legalizer/implicit_def.mir45
-rw-r--r--llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/implicit_def.ll2
-rw-r--r--llvm/test/CodeGen/PowerPC/bittest.ll193
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/legalize-undef-vec-scaling.mir60
-rw-r--r--llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir6
-rw-r--r--llvm/test/CodeGen/X86/bfloat-calling-conv.ll6
-rw-r--r--llvm/test/CodeGen/X86/bittest-big-integer.ll7027
-rw-r--r--llvm/test/CodeGen/X86/trunc-srl-load.ll1652
-rw-r--r--llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll50
-rw-r--r--llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll53
-rw-r--r--llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt3
-rw-r--r--llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt3
-rw-r--r--llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s4
-rw-r--r--llvm/test/Transforms/InstCombine/assume.ll28
-rw-r--r--llvm/test/Transforms/InstCombine/ptrtoaddr.ll72
-rw-r--r--llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll418
-rw-r--r--llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll76
-rw-r--r--llvm/test/Transforms/LoopVectorize/reduction-inloop.ll82
-rw-r--r--llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll542
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll54
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected106
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_test_checks/switch_case.test3
-rw-r--r--llvm/test/tools/dxil-dis/llvm_assume.ll11
-rw-r--r--llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s251
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Cortex/X4-sve-instructions.s55
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-basic-instructions.s3779
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-clear-upper-regs.s892
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-forwarding.s1960
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-neon-instructions.s3731
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-sve-instructions.s10289
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-writeback.s3979
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-zero-lat-movs.s83
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-basic-instructions.s3777
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-clear-upper-regs.s872
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-forwarding.s1960
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-neon-instructions.s3729
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-sve-instructions.s10287
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-writeback.s3979
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-zero-lat-movs.s81
-rw-r--r--llvm/unittests/IR/AbstractCallSiteTest.cpp94
-rw-r--r--llvm/unittests/Transforms/Vectorize/VPlanTest.cpp10
-rw-r--r--llvm/utils/UpdateTestChecks/common.py2
-rw-r--r--llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/MC/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/tools/llvm-remarkutil/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn1
-rw-r--r--llvm/utils/lit/lit/TestRunner.py2
-rw-r--r--llvm/utils/lit/tests/Inputs/shtest-env-path/lit.cfg8
-rw-r--r--llvm/utils/lit/tests/Inputs/shtest-env-path/path.txt8
-rwxr-xr-xllvm/utils/lit/tests/Inputs/shtest-env-path/test.sh4
-rw-r--r--llvm/utils/lit/tests/shtest-env-path.py13
-rw-r--r--llvm/utils/profcheck-xfail.txt1
-rwxr-xr-xllvm/utils/update_test_checks.py12
182 files changed, 69311 insertions, 7910 deletions
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 76b6c8e..e8dbc96 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -594,12 +594,13 @@ public:
// Check if suitable for a bit test
if (N <= DL.getIndexSizeInBits(0u)) {
- SmallPtrSet<const BasicBlock *, 4> Dests;
- for (auto I : SI.cases())
- Dests.insert(I.getCaseSuccessor());
+ DenseMap<const BasicBlock *, unsigned int> DestMap;
+ for (auto I : SI.cases()) {
+ const BasicBlock *BB = I.getCaseSuccessor();
+ ++DestMap[BB];
+ }
- if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
- DL))
+ if (TLI->isSuitableForBitTests(DestMap, MinCaseVal, MaxCaseVal, DL))
return 1;
}
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 6dccdc2..d8d7ccc 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -356,7 +356,7 @@ public:
// trunc(ext x) -> x
ArtifactValueFinder Finder(MRI, Builder, LI);
if (Register FoundReg =
- Finder.findValueFromDef(DstReg, 0, DstTy.getSizeInBits())) {
+ Finder.findValueFromDef(DstReg, 0, DstTy.getSizeInBits(), DstTy)) {
LLT FoundRegTy = MRI.getType(FoundReg);
if (DstTy == FoundRegTy) {
LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_[S,Z,ANY]EXT/G_TRUNC...): "
@@ -641,10 +641,11 @@ public:
Register SrcReg = Concat.getReg(StartSrcIdx);
if (InRegOffset == 0 && Size == SrcSize) {
CurrentBest = SrcReg;
- return findValueFromDefImpl(SrcReg, 0, Size);
+ return findValueFromDefImpl(SrcReg, 0, Size, MRI.getType(SrcReg));
}
- return findValueFromDefImpl(SrcReg, InRegOffset, Size);
+ return findValueFromDefImpl(SrcReg, InRegOffset, Size,
+ MRI.getType(SrcReg));
}
/// Given an build_vector op \p BV and a start bit and size, try to find
@@ -759,7 +760,8 @@ public:
if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) {
SrcRegToUse = ContainerSrcReg;
NewStartBit = StartBit;
- return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
+ return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size,
+ MRI.getType(SrcRegToUse));
}
if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) {
SrcRegToUse = InsertedReg;
@@ -767,7 +769,8 @@ public:
if (NewStartBit == 0 &&
Size == MRI.getType(SrcRegToUse).getSizeInBits())
CurrentBest = SrcRegToUse;
- return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
+ return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size,
+ MRI.getType(SrcRegToUse));
}
// The bit range spans both the inserted and container regions.
return Register();
@@ -799,7 +802,7 @@ public:
if (StartBit == 0 && SrcType.getSizeInBits() == Size)
CurrentBest = SrcReg;
- return findValueFromDefImpl(SrcReg, StartBit, Size);
+ return findValueFromDefImpl(SrcReg, StartBit, Size, SrcType);
}
/// Given an G_TRUNC op \p MI and a start bit and size, try to find
@@ -819,14 +822,14 @@ public:
if (!SrcType.isScalar())
return CurrentBest;
- return findValueFromDefImpl(SrcReg, StartBit, Size);
+ return findValueFromDefImpl(SrcReg, StartBit, Size, SrcType);
}
/// Internal implementation for findValueFromDef(). findValueFromDef()
/// initializes some data like the CurrentBest register, which this method
/// and its callees rely upon.
Register findValueFromDefImpl(Register DefReg, unsigned StartBit,
- unsigned Size) {
+ unsigned Size, LLT DstTy) {
std::optional<DefinitionAndSourceRegister> DefSrcReg =
getDefSrcRegIgnoringCopies(DefReg, MRI);
MachineInstr *Def = DefSrcReg->MI;
@@ -847,7 +850,7 @@ public:
}
Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg();
Register SrcOriginReg =
- findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size);
+ findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size, DstTy);
if (SrcOriginReg)
return SrcOriginReg;
// Failed to find a further value. If the StartBit and Size perfectly
@@ -868,6 +871,12 @@ public:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
return findValueFromExt(*Def, StartBit, Size);
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ if (MRI.getType(DefReg) == DstTy)
+ return DefReg;
+ MIB.setInstrAndDebugLoc(*Def);
+ return MIB.buildUndef(DstTy).getReg(0);
+ }
default:
return CurrentBest;
}
@@ -882,10 +891,10 @@ public:
/// at position \p StartBit with size \p Size.
/// \returns a register with the requested size, or an empty Register if no
/// better value could be found.
- Register findValueFromDef(Register DefReg, unsigned StartBit,
- unsigned Size) {
+ Register findValueFromDef(Register DefReg, unsigned StartBit, unsigned Size,
+ LLT DstTy) {
CurrentBest = Register();
- Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size);
+ Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size, DstTy);
return FoundReg != DefReg ? FoundReg : Register();
}
@@ -904,7 +913,8 @@ public:
DeadDefs[DefIdx] = true;
continue;
}
- Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
+ Register FoundVal =
+ findValueFromDef(DefReg, 0, DestTy.getSizeInBits(), DestTy);
if (!FoundVal)
continue;
if (MRI.getType(FoundVal) != DestTy)
@@ -923,7 +933,7 @@ public:
GUnmerge *findUnmergeThatDefinesReg(Register Reg, unsigned Size,
unsigned &DefOperandIdx) {
- if (Register Def = findValueFromDefImpl(Reg, 0, Size)) {
+ if (Register Def = findValueFromDefImpl(Reg, 0, Size, MRI.getType(Reg))) {
if (auto *Unmerge = dyn_cast<GUnmerge>(MRI.getVRegDef(Def))) {
DefOperandIdx =
Unmerge->findRegisterDefOperandIdx(Def, /*TRI=*/nullptr);
@@ -1288,12 +1298,19 @@ public:
// for N >= %2.getSizeInBits() / 2
// %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
+ Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
MachineInstr *MergeI = MRI.getVRegDef(SrcReg);
+ if (MergeI && MergeI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) {
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildUndef(DstReg);
+ UpdatedDefs.push_back(DstReg);
+ markInstAndDefDead(MI, *MergeI, DeadInsts);
+ return true;
+ }
if (!MergeI || !isa<GMergeLikeInstr>(MergeI))
return false;
- Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(SrcReg);
diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
index a9e53ba..f980d3d 100644
--- a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
+++ b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
@@ -84,6 +84,10 @@ LLVM_ABI Libcall getSINCOS(EVT RetVT);
/// UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getSINCOSPI(EVT RetVT);
+/// Return the SINCOS_STRET_ value for the given types, or UNKNOWN_LIBCALL if
+/// there is none.
+LLVM_ABI Libcall getSINCOS_STRET(EVT RetVT);
+
/// getMODF - Return the MODF_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getMODF(EVT RetVT);
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index d6ed3a8..4058dd7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1433,9 +1433,9 @@ public:
/// \p High as its lowest and highest case values, and expects \p NumCmps
/// case value comparisons. Check if the number of destinations, comparison
/// metric, and range are all suitable.
- bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
- const APInt &Low, const APInt &High,
- const DataLayout &DL) const {
+ bool isSuitableForBitTests(
+ const DenseMap<const BasicBlock *, unsigned int> &DestCmps,
+ const APInt &Low, const APInt &High, const DataLayout &DL) const {
// FIXME: I don't think NumCmps is the correct metric: a single case and a
// range of cases both require only one branch to lower. Just looking at the
// number of clusters and destinations should be enough to decide whether to
@@ -1446,6 +1446,20 @@ public:
if (!rangeFitsInWord(Low, High, DL))
return false;
+ unsigned NumDests = DestCmps.size();
+ unsigned NumCmps = 0;
+ unsigned int MaxBitTestEntry = 0;
+ for (auto &DestCmp : DestCmps) {
+ NumCmps += DestCmp.second;
+ if (DestCmp.second > MaxBitTestEntry)
+ MaxBitTestEntry = DestCmp.second;
+ }
+
+ // Comparisons might be cheaper for small number of comparisons, which can
+ // be Arch Target specific.
+ if (MaxBitTestEntry < getMinimumBitTestCmps())
+ return false;
+
// Decide whether it's profitable to lower this range with bit tests. Each
// destination requires a bit test and branch, and there is an overall range
// check branch. For a small number of clusters, separate comparisons might
@@ -2055,6 +2069,9 @@ public:
virtual bool isJumpTableRelative() const;
+ /// Retuen the minimum of largest number of comparisons in BitTest.
+ unsigned getMinimumBitTestCmps() const;
+
/// If a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
Register getStackPointerRegisterToSaveRestore() const {
@@ -2577,6 +2594,9 @@ protected:
/// Set to zero to generate unlimited jump tables.
void setMaximumJumpTableSize(unsigned);
+ /// Set the minimum of largest of number of comparisons to generate BitTest.
+ void setMinimumBitTestCmps(unsigned Val);
+
/// If set to a physical register, this specifies the register that
/// llvm.savestack/llvm.restorestack should save and restore.
void setStackPointerRegisterToSaveRestore(Register R) {
@@ -3719,6 +3739,9 @@ private:
/// backend supports.
unsigned MinCmpXchgSizeInBits;
+ /// The minimum of largest number of comparisons to use bit test for switch.
+ unsigned MinimumBitTestCmps;
+
/// This indicates if the target supports unaligned atomic operations.
bool SupportsUnalignedAtomics;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 7f9bf12..be78647 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -143,7 +143,11 @@ public:
decltype(make_filter_range(std::declval<iterator_range>(), isCompileUnit));
LLVM_ABI DWARFUnit *getUnitForOffset(uint64_t Offset) const;
- LLVM_ABI DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E);
+ /// Returns the Unit from the .debug_info or .debug_types section by the index
+ /// entry.
+ LLVM_ABI DWARFUnit *
+ getUnitForIndexEntry(const DWARFUnitIndex::Entry &E, DWARFSectionKind Sec,
+ const DWARFSection *Section = nullptr);
/// Read units from a .debug_info or .debug_types section. Calls made
/// before finishedInfoUnits() are assumed to be for .debug_info sections,
diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h
index 9e24ae7..f431e1d 100644
--- a/llvm/include/llvm/IR/AbstractCallSite.h
+++ b/llvm/include/llvm/IR/AbstractCallSite.h
@@ -137,7 +137,7 @@ public:
/// Return true if @p U is the use that defines the callee of this ACS.
bool isCallee(const Use *U) const {
- if (isDirectCall())
+ if (!isCallbackCall())
return CB->isCallee(U);
assert(!CI.ParameterEncoding.empty() &&
@@ -154,7 +154,7 @@ public:
/// Return the number of parameters of the callee.
unsigned getNumArgOperands() const {
- if (isDirectCall())
+ if (!isCallbackCall())
return CB->arg_size();
// Subtract 1 for the callee encoding.
return CI.ParameterEncoding.size() - 1;
@@ -169,7 +169,7 @@ public:
/// Return the operand index of the underlying instruction associated with
/// the function parameter number @p ArgNo or -1 if there is none.
int getCallArgOperandNo(unsigned ArgNo) const {
- if (isDirectCall())
+ if (!isCallbackCall())
return ArgNo;
// Add 1 for the callee encoding.
return CI.ParameterEncoding[ArgNo + 1];
@@ -183,7 +183,7 @@ public:
/// Return the operand of the underlying instruction associated with the
/// function parameter number @p ArgNo or nullptr if there is none.
Value *getCallArgOperand(unsigned ArgNo) const {
- if (isDirectCall())
+ if (!isCallbackCall())
return CB->getArgOperand(ArgNo);
// Add 1 for the callee encoding.
return CI.ParameterEncoding[ArgNo + 1] >= 0
@@ -210,7 +210,7 @@ public:
/// Return the pointer to function that is being called.
Value *getCalledOperand() const {
- if (isDirectCall())
+ if (!isCallbackCall())
return CB->getCalledOperand();
return CB->getArgOperand(getCallArgOperandNoForCallee());
}
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index dacda0a..972a253 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -2191,7 +2191,7 @@ public:
FMFSource);
}
Value *CreatePtrToAddr(Value *V, const Twine &Name = "") {
- return CreateCast(Instruction::PtrToInt, V,
+ return CreateCast(Instruction::PtrToAddr, V,
BB->getDataLayout().getAddressType(V->getType()), Name);
}
Value *CreatePtrToInt(Value *V, Type *DestTy,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5fb7e63..431a810 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2400,10 +2400,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
Results.push_back(Rem);
}
-/// Return true if sincos libcall is available.
+/// Return true if sincos or __sincos_stret libcall is available.
static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
- RTLIB::Libcall LC = RTLIB::getSINCOS(Node->getSimpleValueType(0).SimpleTy);
- return TLI.getLibcallName(LC) != nullptr;
+ MVT::SimpleValueType VT = Node->getSimpleValueType(0).SimpleTy;
+ return TLI.getLibcallImpl(RTLIB::getSINCOS(VT)) != RTLIB::Unsupported ||
+ TLI.getLibcallImpl(RTLIB::getSINCOS_STRET(VT)) != RTLIB::Unsupported;
}
/// Only issue sincos libcall if both sin and cos are needed.
@@ -3752,9 +3753,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
EVT VT = Node->getValueType(0);
// Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
// fcos which share the same operand and both are used.
- if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
- isSinCosLibcallAvailable(Node, TLI))
- && useSinCos(Node)) {
+ if ((TLI.isOperationLegal(ISD::FSINCOS, VT) ||
+ isSinCosLibcallAvailable(Node, TLI)) &&
+ useSinCos(Node)) {
SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
if (Node->getOpcode() == ISD::FCOS)
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 038c499..3fa8243 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -198,7 +198,6 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
assert(First <= Last);
auto Prob = BranchProbability::getZero();
- unsigned NumCmps = 0;
std::vector<MachineBasicBlock*> Table;
DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
@@ -206,12 +205,16 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
for (unsigned I = First; I <= Last; ++I)
JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
+ DenseMap<const BasicBlock *, unsigned int> DestMap;
for (unsigned I = First; I <= Last; ++I) {
assert(Clusters[I].Kind == CC_Range);
Prob += Clusters[I].Prob;
const APInt &Low = Clusters[I].Low->getValue();
const APInt &High = Clusters[I].High->getValue();
- NumCmps += (Low == High) ? 1 : 2;
+ unsigned int NumCmp = (Low == High) ? 1 : 2;
+ const BasicBlock *BB = Clusters[I].MBB->getBasicBlock();
+ DestMap[BB] += NumCmp;
+
if (I != First) {
// Fill the gap between this and the previous cluster.
const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
@@ -226,9 +229,7 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
}
- unsigned NumDests = JTProbs.size();
- if (TLI->isSuitableForBitTests(NumDests, NumCmps,
- Clusters[First].Low->getValue(),
+ if (TLI->isSuitableForBitTests(DestMap, Clusters[First].Low->getValue(),
Clusters[Last].High->getValue(), *DL)) {
// Clusters[First..Last] should be lowered as bit tests instead.
return false;
@@ -372,20 +373,19 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
if (First == Last)
return false;
- BitVector Dests(FuncInfo.MF->getNumBlockIDs());
- unsigned NumCmps = 0;
+ DenseMap<const BasicBlock *, unsigned int> DestMap;
for (int64_t I = First; I <= Last; ++I) {
assert(Clusters[I].Kind == CC_Range);
- Dests.set(Clusters[I].MBB->getNumber());
- NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+ unsigned NumCmp = (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+ const BasicBlock *BB = Clusters[I].MBB->getBasicBlock();
+ DestMap[BB] += NumCmp;
}
- unsigned NumDests = Dests.count();
APInt Low = Clusters[First].Low->getValue();
APInt High = Clusters[Last].High->getValue();
assert(Low.slt(High));
- if (!TLI->isSuitableForBitTests(NumDests, NumCmps, Low, High, *DL))
+ if (!TLI->isSuitableForBitTests(DestMap, Low, High, *DL))
return false;
APInt LowBound;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 59798b3..b3535eac 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -90,6 +91,11 @@ static cl::opt<unsigned> OptsizeJumpTableDensity(
cl::desc("Minimum density for building a jump table in "
"an optsize function"));
+static cl::opt<unsigned> MinimumBitTestCmpsOverride(
+ "min-bit-test-cmps", cl::init(2), cl::Hidden,
+ cl::desc("Set minimum of largest number of comparisons "
+ "to use bit test for switch."));
+
// FIXME: This option is only to test if the strict fp operation processed
// correctly by preventing mutating strict fp operation to normal fp operation
// during development. When the backend supports strict float operation, this
@@ -428,6 +434,11 @@ RTLIB::Libcall RTLIB::getSINCOSPI(EVT RetVT) {
SINCOSPI_F128, SINCOSPI_PPCF128);
}
+RTLIB::Libcall RTLIB::getSINCOS_STRET(EVT RetVT) {
+ return getFPLibCall(RetVT, SINCOS_STRET_F32, SINCOS_STRET_F64,
+ UNKNOWN_LIBCALL, UNKNOWN_LIBCALL, UNKNOWN_LIBCALL);
+}
+
RTLIB::Libcall RTLIB::getMODF(EVT RetVT) {
return getFPLibCall(RetVT, MODF_F32, MODF_F64, MODF_F80, MODF_F128,
MODF_PPCF128);
@@ -719,6 +730,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
MinCmpXchgSizeInBits = 0;
SupportsUnalignedAtomics = false;
+
+ MinimumBitTestCmps = MinimumBitTestCmpsOverride;
}
// Define the virtual destructor out-of-line to act as a key method to anchor
@@ -2129,6 +2142,14 @@ bool TargetLoweringBase::isJumpTableRelative() const {
return getTargetMachine().isPositionIndependent();
}
+unsigned TargetLoweringBase::getMinimumBitTestCmps() const {
+ return MinimumBitTestCmps;
+}
+
+void TargetLoweringBase::setMinimumBitTestCmps(unsigned Val) {
+ MinimumBitTestCmps = Val;
+}
+
Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const {
if (TM.Options.LoopAlignment)
return Align(TM.Options.LoopAlignment);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 73df62a..41cea45 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1344,9 +1344,20 @@ void DWARFContext::dump(
DWARFTypeUnit *DWARFContext::getTypeUnitForHash(uint64_t Hash, bool IsDWO) {
DWARFUnitVector &DWOUnits = State->getDWOUnits();
if (const auto &TUI = getTUIndex()) {
- if (const auto *R = TUI.getFromHash(Hash))
- return dyn_cast_or_null<DWARFTypeUnit>(
- DWOUnits.getUnitForIndexEntry(*R));
+ if (const auto *R = TUI.getFromHash(Hash)) {
+ if (TUI.getVersion() >= 5) {
+ return dyn_cast_or_null<DWARFTypeUnit>(
+ DWOUnits.getUnitForIndexEntry(*R, DW_SECT_INFO));
+ } else {
+ DWARFUnit *TypesUnit = nullptr;
+ getDWARFObj().forEachTypesDWOSections([&](const DWARFSection &S) {
+ if (!TypesUnit)
+ TypesUnit =
+ DWOUnits.getUnitForIndexEntry(*R, DW_SECT_EXT_TYPES, &S);
+ });
+ return dyn_cast_or_null<DWARFTypeUnit>(TypesUnit);
+ }
+ }
return nullptr;
}
return State->getTypeUnitMap(IsDWO).lookup(Hash);
@@ -1358,7 +1369,7 @@ DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
if (const auto &CUI = getCUIndex()) {
if (const auto *R = CUI.getFromHash(Hash))
return dyn_cast_or_null<DWARFCompileUnit>(
- DWOUnits.getUnitForIndexEntry(*R));
+ DWOUnits.getUnitForIndexEntry(*R, DW_SECT_INFO));
return nullptr;
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index ef59c82..da0bf03 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -161,17 +161,24 @@ DWARFUnit *DWARFUnitVector::getUnitForOffset(uint64_t Offset) const {
return nullptr;
}
-DWARFUnit *
-DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
- const auto *CUOff = E.getContribution(DW_SECT_INFO);
+DWARFUnit *DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E,
+ DWARFSectionKind Sec,
+ const DWARFSection *Section) {
+ const auto *CUOff = E.getContribution(Sec);
if (!CUOff)
return nullptr;
uint64_t Offset = CUOff->getOffset();
- auto end = begin() + getNumInfoUnits();
+ auto begin = this->begin();
+ auto end = begin + getNumInfoUnits();
+
+ if (Sec == DW_SECT_EXT_TYPES) {
+ begin = end;
+ end = this->end();
+ }
auto *CU =
- std::upper_bound(begin(), end, CUOff->getOffset(),
+ std::upper_bound(begin, end, CUOff->getOffset(),
[](uint64_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
return LHS < RHS->getNextUnitOffset();
});
@@ -181,13 +188,14 @@ DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
if (!Parser)
return nullptr;
- auto U = Parser(Offset, DW_SECT_INFO, nullptr, &E);
+ auto U = Parser(Offset, Sec, Section, &E);
if (!U)
return nullptr;
auto *NewCU = U.get();
this->insert(CU, std::move(U));
- ++NumInfoUnits;
+ if (Sec == DW_SECT_INFO)
+ ++NumInfoUnits;
return NewCU;
}
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 8d0ff41..1396841 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -60,7 +60,7 @@ FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
FunctionPass *createAArch64CollectLOHPass();
FunctionPass *createSMEABIPass();
FunctionPass *createSMEPeepholeOptPass();
-FunctionPass *createMachineSMEABIPass();
+FunctionPass *createMachineSMEABIPass(CodeGenOptLevel);
ModulePass *createSVEIntrinsicOptsPass();
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &,
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index a4529a5..0f457c2 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -133,6 +133,8 @@ include "AArch64SchedNeoverseN2.td"
include "AArch64SchedNeoverseN3.td"
include "AArch64SchedNeoverseV1.td"
include "AArch64SchedNeoverseV2.td"
+include "AArch64SchedNeoverseV3.td"
+include "AArch64SchedNeoverseV3AE.td"
include "AArch64SchedOryon.td"
include "AArch64Processors.td"
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d16b116..60aa61e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9028,11 +9028,12 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
CallingConv::ID CallerCC = CallerF.getCallingConv();
// SME Streaming functions are not eligible for TCO as they may require
- // the streaming mode or ZA to be restored after returning from the call.
+ // the streaming mode or ZA/ZT0 to be restored after returning from the call.
SMECallAttrs CallAttrs =
getSMECallAttrs(CallerF, getRuntimeLibcallsInfo(), CLI);
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
CallAttrs.requiresPreservingAllZAState() ||
+ CallAttrs.requiresPreservingZT0() ||
CallAttrs.caller().hasStreamingBody())
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 81f5d07..11387bb 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -1272,11 +1272,11 @@ def : ProcessorModel<"cortex-x2", NeoverseV2Model, ProcessorFeatures.X2,
[TuneX2]>;
def : ProcessorModel<"cortex-x3", NeoverseV2Model, ProcessorFeatures.X3,
[TuneX3]>;
-def : ProcessorModel<"cortex-x4", NeoverseV2Model, ProcessorFeatures.X4,
+def : ProcessorModel<"cortex-x4", NeoverseV3Model, ProcessorFeatures.X4,
[TuneX4]>;
-def : ProcessorModel<"cortex-x925", NeoverseV2Model, ProcessorFeatures.X925,
+def : ProcessorModel<"cortex-x925", NeoverseV3Model, ProcessorFeatures.X925,
[TuneX925]>;
-def : ProcessorModel<"gb10", NeoverseV2Model, ProcessorFeatures.GB10,
+def : ProcessorModel<"gb10", NeoverseV3Model, ProcessorFeatures.GB10,
[TuneX925]>;
def : ProcessorModel<"grace", NeoverseV2Model, ProcessorFeatures.Grace,
[TuneNeoverseV2]>;
@@ -1295,9 +1295,9 @@ def : ProcessorModel<"neoverse-v1", NeoverseV1Model,
ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>;
def : ProcessorModel<"neoverse-v2", NeoverseV2Model,
ProcessorFeatures.NeoverseV2, [TuneNeoverseV2]>;
-def : ProcessorModel<"neoverse-v3", NeoverseV2Model,
+def : ProcessorModel<"neoverse-v3", NeoverseV3Model,
ProcessorFeatures.NeoverseV3, [TuneNeoverseV3]>;
-def : ProcessorModel<"neoverse-v3ae", NeoverseV2Model,
+def : ProcessorModel<"neoverse-v3ae", NeoverseV3AEModel,
ProcessorFeatures.NeoverseV3AE, [TuneNeoverseV3AE]>;
def : ProcessorModel<"exynos-m3", ExynosM3Model, ProcessorFeatures.ExynosM3,
[TuneExynosM3]>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
new file mode 100644
index 0000000..e23576a
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
@@ -0,0 +1,2777 @@
+//=- AArch64SchedNeoverseV3.td - NeoverseV3 Scheduling Defs --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the Arm Neoverse V3 processors.
+// All information is taken from the V3 Software Optimization guide:
+//
+// https://developer.arm.com/documentation/109678/300/?lang=en
+//
+//===----------------------------------------------------------------------===//
+
+def NeoverseV3Model : SchedMachineModel {
+ let IssueWidth = 10; // Expect best value to be slightly higher than V2
+ let MicroOpBufferSize = 320; // Entries in micro-op re-order buffer. NOTE: Copied from Neoverse-V2
+ let LoadLatency = 4; // Optimistic load latency.
+ let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2.
+ let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
+ [HasSVE2p1, HasSVEB16B16,
+ HasCPA, HasCSSC]);
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Neoverse V3.
+// Instructions are first fetched and then decoded into internal macro-ops
+// (MOPs). From there, the MOPs proceed through register renaming and dispatch
+// stages. A MOP can be split into two micro-ops further down the pipeline
+// after the decode stage. Once dispatched, micro-ops wait for their operands
+// and issue out-of-order to one of twenty-one issue pipelines. Each issue
+// pipeline can accept one micro-op per cycle.
+
+let SchedModel = NeoverseV3Model in {
+
+// Define the (21) issue ports.
+def V3UnitB : ProcResource<3>; // Branch 0/1/2
+def V3UnitS0 : ProcResource<1>; // Integer single-cycle 0
+def V3UnitS1 : ProcResource<1>; // Integer single-cycle 1
+def V3UnitS2 : ProcResource<1>; // Integer single-cycle 2
+def V3UnitS3 : ProcResource<1>; // Integer single-cycle 3
+def V3UnitS4 : ProcResource<1>; // Integer single-cycle 4
+def V3UnitS5 : ProcResource<1>; // Integer single-cycle 5
+def V3UnitM0 : ProcResource<1>; // Integer single/multicycle 0
+def V3UnitM1 : ProcResource<1>; // Integer single/multicycle 1
+def V3UnitV0 : ProcResource<1>; // FP/ASIMD 0
+def V3UnitV1 : ProcResource<1>; // FP/ASIMD 1
+def V3UnitV2 : ProcResource<1>; // FP/ASIMD 2
+def V3UnitV3 : ProcResource<1>; // FP/ASIMD 3
+def V3UnitLS0 : ProcResource<1>; // Load/Store 0
+def V3UnitL12 : ProcResource<2>; // Load 1/2
+def V3UnitST1 : ProcResource<1>; // Store 1
+def V3UnitD : ProcResource<2>; // Store data 0/1
+def V3UnitFlg : ProcResource<4>; // Flags
+
+def V3UnitS : ProcResGroup<[V3UnitS0, V3UnitS1, V3UnitS2, V3UnitS3, V3UnitS4, V3UnitS5]>; // Integer single-cycle 0/1/2/3/4/5
+def V3UnitI : ProcResGroup<[V3UnitS0, V3UnitS1, V3UnitS2, V3UnitS3, V3UnitS4, V3UnitS5, V3UnitM0, V3UnitM1]>; // Integer single-cycle 0/1/2/3/4/5 and single/multicycle 0/1
+def V3UnitM : ProcResGroup<[V3UnitM0, V3UnitM1]>; // Integer single/multicycle 0/1
+def V3UnitLSA : ProcResGroup<[V3UnitLS0, V3UnitL12, V3UnitST1]>; // Supergroup of L+SA
+def V3UnitL : ProcResGroup<[V3UnitLS0, V3UnitL12]>; // Load/Store 0 and Load 1/2
+def V3UnitSA : ProcResGroup<[V3UnitLS0, V3UnitST1]>; // Load/Store 0 and Store 1
+def V3UnitV : ProcResGroup<[V3UnitV0, V3UnitV1, V3UnitV2, V3UnitV3]>; // FP/ASIMD 0/1/2/3
+def V3UnitV01 : ProcResGroup<[V3UnitV0, V3UnitV1]>; // FP/ASIMD 0/1
+def V3UnitV02 : ProcResGroup<[V3UnitV0, V3UnitV2]>; // FP/ASIMD 0/2
+def V3UnitV13 : ProcResGroup<[V3UnitV1, V3UnitV3]>; // FP/ASIMD 1/3
+
+// Define commonly used read types.
+
+// No forwarding is provided for these types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+// NOTE: Copied from N2.
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Neoverse V3.
+
+//===----------------------------------------------------------------------===//
+
+// Define generic 0 micro-op types
+def V3Write_0c : SchedWriteRes<[]> { let Latency = 0; }
+
+// Define generic 1 micro-op types
+
+def V3Write_1c_1B : SchedWriteRes<[V3UnitB]> { let Latency = 1; }
+def V3Write_1c_1F_1Flg : SchedWriteRes<[V3UnitI, V3UnitFlg]> { let Latency = 1; }
+def V3Write_1c_1I : SchedWriteRes<[V3UnitI]> { let Latency = 1; }
+def V3Write_1c_1M : SchedWriteRes<[V3UnitM]> { let Latency = 1; }
+def V3Write_1c_1SA : SchedWriteRes<[V3UnitSA]> { let Latency = 1; }
+def V3Write_2c_1M : SchedWriteRes<[V3UnitM]> { let Latency = 2; }
+def V3Write_2c_1M_1Flg : SchedWriteRes<[V3UnitM, V3UnitFlg]> { let Latency = 2; }
+def V3Write_3c_1M : SchedWriteRes<[V3UnitM]> { let Latency = 3; }
+def V3Write_2c_1M0 : SchedWriteRes<[V3UnitM0]> { let Latency = 2; }
+def V3Write_3c_1M0 : SchedWriteRes<[V3UnitM0]> { let Latency = 3; }
+def V3Write_4c_1M0 : SchedWriteRes<[V3UnitM0]> { let Latency = 4; }
+def V3Write_12c_1M0 : SchedWriteRes<[V3UnitM0]> { let Latency = 12;
+ let ReleaseAtCycles = [12]; }
+def V3Write_20c_1M0 : SchedWriteRes<[V3UnitM0]> { let Latency = 20;
+ let ReleaseAtCycles = [20]; }
+def V3Write_4c_1L : SchedWriteRes<[V3UnitL]> { let Latency = 4; }
+def V3Write_6c_1L : SchedWriteRes<[V3UnitL]> { let Latency = 6; }
+def V3Write_2c_1V : SchedWriteRes<[V3UnitV]> { let Latency = 2; }
+def V3Write_2c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 2; }
+def V3Write_3c_1V : SchedWriteRes<[V3UnitV]> { let Latency = 3; }
+def V3Write_3c_1V01 : SchedWriteRes<[V3UnitV01]> { let Latency = 3;
+ let ReleaseAtCycles = [2]; }
+def V3Write_4c_1V : SchedWriteRes<[V3UnitV]> { let Latency = 4; }
+def V3Write_5c_1V : SchedWriteRes<[V3UnitV]> { let Latency = 5; }
+def V3Write_6c_1V : SchedWriteRes<[V3UnitV]> { let Latency = 6; }
+def V3Write_12c_1V : SchedWriteRes<[V3UnitV]> { let Latency = 12; }
+def V3Write_3c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 3; }
+def V3Write_3c_1V02 : SchedWriteRes<[V3UnitV02]> { let Latency = 3; }
+def V3Write_4c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 4; }
+def V3Write_4c_1V02 : SchedWriteRes<[V3UnitV02]> { let Latency = 4; }
+def V3Write_9c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 9; }
+def V3Write_10c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 10; }
+def V3Write_8c_1V1 : SchedWriteRes<[V3UnitV1]> { let Latency = 8; }
+def V3Write_12c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 12;
+ let ReleaseAtCycles = [11]; }
+def V3Write_13c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 13; }
+def V3Write_15c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 15; }
+def V3Write_13c_1V1 : SchedWriteRes<[V3UnitV1]> { let Latency = 13; }
+def V3Write_16c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 16; }
+def V3Write_16c_1V02 : SchedWriteRes<[V3UnitV02]> { let Latency = 16;
+ let ReleaseAtCycles = [8]; }
+def V3Write_20c_1V0 : SchedWriteRes<[V3UnitV0]> { let Latency = 20;
+ let ReleaseAtCycles = [20]; }
+def V3Write_2c_1V1 : SchedWriteRes<[V3UnitV1]> { let Latency = 2; }
+def V3Write_2c_1V13 : SchedWriteRes<[V3UnitV13]> { let Latency = 2; }
+def V3Write_3c_1V1 : SchedWriteRes<[V3UnitV1]> { let Latency = 3; }
+def V3Write_3c_1V13 : SchedWriteRes<[V3UnitV13]> { let Latency = 3; }
+def V3Write_4c_1V1 : SchedWriteRes<[V3UnitV1]> { let Latency = 4; }
+def V3Write_6c_1V1 : SchedWriteRes<[V3UnitV1]> { let Latency = 6; }
+def V3Write_10c_1V1 : SchedWriteRes<[V3UnitV1]> { let Latency = 10; }
+def V3Write_6c_1SA : SchedWriteRes<[V3UnitSA]> { let Latency = 6; }
+
+//===----------------------------------------------------------------------===//
+// Define generic 2 micro-op types
+
+def V3Write_1c_1B_1S : SchedWriteRes<[V3UnitB, V3UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def V3Write_6c_1M0_1B : SchedWriteRes<[V3UnitM0, V3UnitB]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3Write_9c_1M0_1L : SchedWriteRes<[V3UnitM0, V3UnitL]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def V3Write_3c_1I_1M : SchedWriteRes<[V3UnitI, V3UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def V3Write_1c_2M : SchedWriteRes<[V3UnitM, V3UnitM]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def V3Write_3c_2M : SchedWriteRes<[V3UnitM, V3UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def V3Write_4c_2M : SchedWriteRes<[V3UnitM, V3UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V3Write_5c_1L_1I : SchedWriteRes<[V3UnitL, V3UnitI]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V3Write_6c_1I_1L : SchedWriteRes<[V3UnitI, V3UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3Write_7c_1I_1L : SchedWriteRes<[V3UnitI, V3UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def V3Write_1c_1SA_1D : SchedWriteRes<[V3UnitSA, V3UnitD]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def V3Write_5c_1M0_1V : SchedWriteRes<[V3UnitM0, V3UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V3Write_2c_1SA_1V01 : SchedWriteRes<[V3UnitSA, V3UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V3Write_2c_2V01 : SchedWriteRes<[V3UnitV01, V3UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V3Write_4c_1SA_1V01 : SchedWriteRes<[V3UnitSA, V3UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V3Write_5c_1V13_1V : SchedWriteRes<[V3UnitV13, V3UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V3Write_4c_2V0 : SchedWriteRes<[V3UnitV0, V3UnitV0]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V3Write_4c_2V02 : SchedWriteRes<[V3UnitV02, V3UnitV02]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V3Write_4c_2V : SchedWriteRes<[V3UnitV, V3UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V3Write_6c_2V : SchedWriteRes<[V3UnitV, V3UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3Write_6c_2L : SchedWriteRes<[V3UnitL, V3UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3Write_8c_1L_1V : SchedWriteRes<[V3UnitL, V3UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def V3Write_4c_1SA_1V : SchedWriteRes<[V3UnitSA, V3UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V3Write_3c_1M0_1M : SchedWriteRes<[V3UnitM0, V3UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def V3Write_4c_1M0_1M : SchedWriteRes<[V3UnitM0, V3UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V3Write_1c_1M0_1M : SchedWriteRes<[V3UnitM0, V3UnitM]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def V3Write_2c_1M0_1M : SchedWriteRes<[V3UnitM0, V3UnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V3Write_6c_2V1 : SchedWriteRes<[V3UnitV1, V3UnitV1]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3Write_5c_2V0 : SchedWriteRes<[V3UnitV0, V3UnitV0]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V3Write_5c_2V02 : SchedWriteRes<[V3UnitV02, V3UnitV02]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V3Write_5c_1V1_1M0 : SchedWriteRes<[V3UnitV1, V3UnitM0]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V3Write_6c_1V1_1M0 : SchedWriteRes<[V3UnitV1, V3UnitM0]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3Write_7c_1M0_1V02 : SchedWriteRes<[V3UnitM0, V3UnitV02]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def V3Write_2c_1V0_1M : SchedWriteRes<[V3UnitV0, V3UnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V3Write_3c_1V0_1M : SchedWriteRes<[V3UnitV0, V3UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def V3Write_6c_1V_1V13 : SchedWriteRes<[V3UnitV, V3UnitV13]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3Write_6c_1L_1M : SchedWriteRes<[V3UnitL, V3UnitM]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3Write_6c_1L_1I : SchedWriteRes<[V3UnitL, V3UnitI]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3Write_6c_2V13 : SchedWriteRes<[V3UnitV13, V3UnitV13]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3Write_8c_1M0_1V01 : SchedWriteRes<[V3UnitM0, V3UnitV01]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 3 micro-op types
+
+def V3Write_1c_1SA_1D_1I : SchedWriteRes<[V3UnitSA, V3UnitD, V3UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 3;
+}
+
+def V3Write_2c_1SA_1V01_1I : SchedWriteRes<[V3UnitSA, V3UnitV01, V3UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V3Write_2c_1SA_2V01 : SchedWriteRes<[V3UnitSA, V3UnitV01, V3UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V3Write_4c_1SA_2V01 : SchedWriteRes<[V3UnitSA, V3UnitV01, V3UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def V3Write_9c_1L_2V : SchedWriteRes<[V3UnitL, V3UnitV, V3UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+
+def V3Write_4c_3V : SchedWriteRes<[V3UnitV, V3UnitV, V3UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def V3Write_7c_1M_1M0_1V : SchedWriteRes<[V3UnitM, V3UnitM0, V3UnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+def V3Write_2c_1SA_1I_1V01 : SchedWriteRes<[V3UnitSA, V3UnitI, V3UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V3Write_6c_3L : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def V3Write_6c_3V : SchedWriteRes<[V3UnitV, V3UnitV, V3UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def V3Write_8c_1L_2V : SchedWriteRes<[V3UnitL, V3UnitV, V3UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 4 micro-op types
+
+def V3Write_2c_1SA_2V01_1I : SchedWriteRes<[V3UnitSA, V3UnitV01, V3UnitV01,
+ V3UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def V3Write_2c_2SA_2V01 : SchedWriteRes<[V3UnitSA, V3UnitSA,
+ V3UnitV01, V3UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def V3Write_4c_2SA_2V01 : SchedWriteRes<[V3UnitSA, V3UnitSA,
+ V3UnitV01, V3UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def V3Write_5c_1I_3L : SchedWriteRes<[V3UnitI, V3UnitL, V3UnitL, V3UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def V3Write_6c_4V0 : SchedWriteRes<[V3UnitV0, V3UnitV0, V3UnitV0, V3UnitV0]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V3Write_8c_4V : SchedWriteRes<[V3UnitV, V3UnitV, V3UnitV, V3UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V3Write_6c_2V_2V13 : SchedWriteRes<[V3UnitV, V3UnitV, V3UnitV13,
+ V3UnitV13]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V3Write_8c_2V_2V13 : SchedWriteRes<[V3UnitV, V3UnitV, V3UnitV13,
+ V3UnitV13]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V3Write_6c_4V02 : SchedWriteRes<[V3UnitV02, V3UnitV02, V3UnitV02,
+ V3UnitV02]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V3Write_6c_4V : SchedWriteRes<[V3UnitV, V3UnitV, V3UnitV, V3UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V3Write_8c_2L_2V : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitV, V3UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V3Write_9c_2L_2V : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitV, V3UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+
+def V3Write_2c_2SA_2V : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitV,
+ V3UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def V3Write_4c_2SA_2V : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitV,
+ V3UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def V3Write_8c_2M0_2V02 : SchedWriteRes<[V3UnitM0, V3UnitM0, V3UnitV02,
+ V3UnitV02]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V3Write_8c_2V_2V1 : SchedWriteRes<[V3UnitV, V3UnitV, V3UnitV1,
+ V3UnitV1]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V3Write_4c_2M0_2M : SchedWriteRes<[V3UnitM0, V3UnitM0, V3UnitM,
+ V3UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def V3Write_5c_2M0_2M : SchedWriteRes<[V3UnitM0, V3UnitM0, V3UnitM,
+ V3UnitM]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def V3Write_6c_2I_2L : SchedWriteRes<[V3UnitI, V3UnitI, V3UnitL, V3UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V3Write_7c_4L : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitL, V3UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+
+def V3Write_6c_1SA_3V01 : SchedWriteRes<[V3UnitSA, V3UnitV01, V3UnitV01,
+ V3UnitV01]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 5 micro-op types
+
+def V3Write_2c_1SA_2V01_2I : SchedWriteRes<[V3UnitSA, V3UnitV01, V3UnitV01,
+ V3UnitI, V3UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 5;
+}
+
+def V3Write_8c_2L_3V : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitV, V3UnitV,
+ V3UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+
+def V3Write_9c_1L_4V : SchedWriteRes<[V3UnitL, V3UnitV, V3UnitV, V3UnitV,
+ V3UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+
+def V3Write_10c_1L_4V : SchedWriteRes<[V3UnitL, V3UnitV, V3UnitV, V3UnitV,
+ V3UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 5;
+}
+
+def V3Write_6c_5V : SchedWriteRes<[V3UnitV, V3UnitV, V3UnitV, V3UnitV,
+ V3UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 5;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 6 micro-op types
+
+def V3Write_8c_3L_3V : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitL,
+ V3UnitV, V3UnitV, V3UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+}
+
+def V3Write_9c_3L_3V : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitL,
+ V3UnitV, V3UnitV, V3UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def V3Write_9c_2L_4V : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitV,
+ V3UnitV, V3UnitV, V3UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def V3Write_9c_2L_2V_2I : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitV,
+ V3UnitV, V3UnitI, V3UnitI]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def V3Write_9c_2V_4V13 : SchedWriteRes<[V3UnitV, V3UnitV, V3UnitV13,
+ V3UnitV13, V3UnitV13, V3UnitV13]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def V3Write_2c_3SA_3V : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitV, V3UnitV, V3UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 6;
+}
+
+def V3Write_4c_2SA_4V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+def V3Write_5c_2SA_4V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01]> {
+ let Latency = 5;
+ let NumMicroOps = 6;
+}
+
+def V3Write_2c_3SA_3V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitV01, V3UnitV01, V3UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 6;
+}
+
+def V3Write_4c_2SA_2I_2V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitI,
+ V3UnitI, V3UnitV01, V3UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 7 micro-op types
+
+def V3Write_8c_3L_4V : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitL,
+ V3UnitV, V3UnitV, V3UnitV, V3UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 7;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 8 micro-op types
+
+def V3Write_2c_4SA_4V : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitV, V3UnitV, V3UnitV,
+ V3UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 8;
+}
+
+def V3Write_2c_4SA_4V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 8;
+}
+
+def V3Write_6c_2SA_6V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01]> {
+ let Latency = 6;
+ let NumMicroOps = 8;
+}
+
+def V3Write_8c_4L_4V : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitL, V3UnitL,
+ V3UnitV, V3UnitV, V3UnitV, V3UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 9 micro-op types
+
+def V3Write_6c_3SA_6V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01]> {
+ let Latency = 6;
+ let NumMicroOps = 9;
+}
+
+def V3Write_10c_1L_8V : SchedWriteRes<[V3UnitL, V3UnitV, V3UnitV, V3UnitV,
+ V3UnitV, V3UnitV, V3UnitV, V3UnitV,
+ V3UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 9;
+}
+
+def V3Write_10c_3V_3L_3I : SchedWriteRes<[V3UnitV, V3UnitV, V3UnitV,
+ V3UnitL, V3UnitL, V3UnitL,
+ V3UnitI, V3UnitI, V3UnitI]> {
+ let Latency = 10;
+ let NumMicroOps = 9;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 10 micro-op types
+
+def V3Write_9c_6L_4V : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitL, V3UnitL,
+ V3UnitL, V3UnitL, V3UnitV, V3UnitV,
+ V3UnitV, V3UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 10;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 12 micro-op types
+
+def V3Write_5c_4SA_8V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01]> {
+ let Latency = 5;
+ let NumMicroOps = 12;
+}
+
+def V3Write_9c_4L_8V : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitL,
+ V3UnitL, V3UnitV, V3UnitV,
+ V3UnitV, V3UnitV, V3UnitV,
+ V3UnitV, V3UnitV, V3UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 12;
+}
+
+def V3Write_10c_4L_8V : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitL,
+ V3UnitL, V3UnitV, V3UnitV,
+ V3UnitV, V3UnitV, V3UnitV,
+ V3UnitV, V3UnitV, V3UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 12;
+}
+
+def V3Write_4c_6SA_6V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 12;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 16 micro-op types
+
+def V3Write_7c_4SA_12V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01]> {
+ let Latency = 7;
+ let NumMicroOps = 16;
+}
+
+def V3Write_10c_4L_8V_4I : SchedWriteRes<[V3UnitL, V3UnitL, V3UnitL,
+ V3UnitL, V3UnitV, V3UnitV,
+ V3UnitV, V3UnitV, V3UnitV,
+ V3UnitV, V3UnitV, V3UnitV,
+ V3UnitI, V3UnitI, V3UnitI,
+ V3UnitI]> {
+ let Latency = 10;
+ let NumMicroOps = 16;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 18 micro-op types
+
+def V3Write_7c_9SA_9V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01]> {
+ let Latency = 7;
+ let NumMicroOps = 18;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 27 micro-op types
+
+def V3Write_7c_9SA_9I_9V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitI, V3UnitI, V3UnitI,
+ V3UnitI, V3UnitI, V3UnitI,
+ V3UnitI, V3UnitI, V3UnitI,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01,
+ V3UnitV01]> {
+ let Latency = 7;
+ let NumMicroOps = 27;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 36 micro-op types
+
+def V3Write_11c_18SA_18V01 : SchedWriteRes<[V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA, V3UnitSA,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01,
+ V3UnitV01]> {
+ let Latency = 11;
+ let NumMicroOps = 36;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 54 micro-op types
+
+def V3Write_11c_18SA_18I_18V01 : SchedWriteRes<[V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA,
+ V3UnitSA, V3UnitSA,
+ V3UnitI, V3UnitI, V3UnitI,
+ V3UnitI, V3UnitI, V3UnitI,
+ V3UnitI, V3UnitI, V3UnitI,
+ V3UnitI, V3UnitI, V3UnitI,
+ V3UnitI, V3UnitI, V3UnitI,
+ V3UnitI, V3UnitI, V3UnitI,
+ V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01,
+ V3UnitV01, V3UnitV01]> {
+ let Latency = 11;
+ let NumMicroOps = 54;
+}
+
+//===----------------------------------------------------------------------===//
+// Define predicate-controlled types
+
+def V3Write_ArithI : SchedWriteVariant<[
+ SchedVar<IsCheapLSL, [V3Write_1c_1I]>,
+ SchedVar<NoSchedPred, [V3Write_2c_1M]>]>;
+
+def V3Write_ArithF : SchedWriteVariant<[
+ SchedVar<IsCheapLSL, [V3Write_1c_1F_1Flg]>,
+ SchedVar<NoSchedPred, [V3Write_2c_1M_1Flg]>]>;
+
+def V3Write_Logical : SchedWriteVariant<[
+ SchedVar<NeoverseNoLSL, [V3Write_1c_1F_1Flg]>,
+ SchedVar<NoSchedPred, [V3Write_2c_1M_1Flg]>]>;
+
+def V3Write_Extr : SchedWriteVariant<[
+ SchedVar<IsRORImmIdiomPred, [V3Write_1c_1I]>,
+ SchedVar<NoSchedPred, [V3Write_3c_1I_1M]>]>;
+
+def V3Write_LdrHQ : SchedWriteVariant<[
+ SchedVar<NeoverseHQForm, [V3Write_7c_1I_1L]>,
+ SchedVar<NoSchedPred, [V3Write_6c_1L]>]>;
+
+def V3Write_StrHQ : SchedWriteVariant<[
+ SchedVar<NeoverseHQForm, [V3Write_2c_1SA_1V01_1I]>,
+ SchedVar<NoSchedPred, [V3Write_2c_1SA_1V01]>]>;
+
+def V3Write_0or1c_1I : SchedWriteVariant<[
+ SchedVar<NeoverseZeroMove, [V3Write_0c]>,
+ SchedVar<NoSchedPred, [V3Write_1c_1I]>]>;
+
+def V3Write_0or2c_1V : SchedWriteVariant<[
+ SchedVar<NeoverseZeroMove, [V3Write_0c]>,
+ SchedVar<NoSchedPred, [V3Write_2c_1V]>]>;
+
+def V3Write_0or3c_1M0 : SchedWriteVariant<[
+ SchedVar<NeoverseZeroMove, [V3Write_0c]>,
+ SchedVar<NoSchedPred, [V3Write_3c_1M0]>]>;
+
+def V3Write_2or3c_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V3Write_3c_1M]>,
+ SchedVar<NoSchedPred, [V3Write_2c_1M]>]>;
+
+def V3Write_1or2c_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V3Write_2c_1M]>,
+ SchedVar<NoSchedPred, [V3Write_1c_1M]>]>;
+
+def V3Write_3or4c_1M0_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V3Write_4c_1M0_1M]>,
+ SchedVar<NoSchedPred, [V3Write_3c_1M0_1M]>]>;
+
+def V3Write_2or3c_1V0 : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V3Write_3c_1V0]>,
+ SchedVar<NoSchedPred, [V3Write_2c_1V0]>]>;
+
+def V3Write_2or3c_1V0_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V3Write_3c_1V0_1M]>,
+ SchedVar<NoSchedPred, [V3Write_2c_1V0_1M]>]>;
+
+def V3Write_IncDec : SchedWriteVariant<[
+ SchedVar<NeoverseCheapIncDec, [V3Write_1c_1I]>,
+ SchedVar<NoSchedPred, [V3Write_2c_1M]>]>;
+
+//===----------------------------------------------------------------------===//
+// Define forwarded types
+
+// NOTE: SOG, p. 16, n. 2: Accumulator forwarding is not supported for
+// consumers of 64 bit multiply high operations?
+def V3Wr_IM : SchedWriteRes<[V3UnitM]> { let Latency = 2; }
+
+def V3Wr_FMA : SchedWriteRes<[V3UnitV]> { let Latency = 4; }
+def V3Rd_FMA : SchedReadAdvance<2, [WriteFMul, V3Wr_FMA]>;
+
+def V3Wr_VA : SchedWriteRes<[V3UnitV]> { let Latency = 4; }
+def V3Rd_VA : SchedReadAdvance<3, [V3Wr_VA]>;
+
+def V3Wr_VDOT : SchedWriteRes<[V3UnitV]> { let Latency = 3; }
+def V3Rd_VDOT : SchedReadAdvance<2, [V3Wr_VDOT]>;
+
+def V3Wr_VMMA : SchedWriteRes<[V3UnitV]> { let Latency = 3; }
+def V3Rd_VMMA : SchedReadAdvance<2, [V3Wr_VMMA]>;
+
+def V3Wr_VMA : SchedWriteRes<[V3UnitV02]> { let Latency = 4; }
+def V3Rd_VMA : SchedReadAdvance<3, [V3Wr_VMA]>;
+
+def V3Wr_VMAH : SchedWriteRes<[V3UnitV02, V3UnitV02]> { let Latency = 4; }
+def V3Rd_VMAH : SchedReadAdvance<2, [V3Wr_VMAH]>;
+
+def V3Wr_VMAL : SchedWriteRes<[V3UnitV02]> { let Latency = 4; }
+def V3Rd_VMAL : SchedReadAdvance<3, [V3Wr_VMAL]>;
+
+def V3Wr_VPA : SchedWriteRes<[V3UnitV]> { let Latency = 4; }
+def V3Rd_VPA : SchedReadAdvance<3, [V3Wr_VPA]>;
+
+def V3Wr_VSA : SchedWriteRes<[V3UnitV]> { let Latency = 4; }
+def V3Rd_VSA : SchedReadAdvance<3, [V3Wr_VSA]>;
+
+def V3Wr_VFCMA : SchedWriteRes<[V3UnitV]> { let Latency = 4; }
+def V3Rd_VFCMA : SchedReadAdvance<2, [V3Wr_VFCMA]>;
+
+def V3Wr_VFM : SchedWriteRes<[V3UnitV]> { let Latency = 3; }
+def V3Wr_VFMA : SchedWriteRes<[V3UnitV]> { let Latency = 4; }
+def V3Rd_VFMA : SchedReadAdvance<2, [V3Wr_VFM, V3Wr_VFMA]>;
+
+def V3Wr_VFMAL : SchedWriteRes<[V3UnitV]> { let Latency = 4; }
+def V3Rd_VFMAL : SchedReadAdvance<2, [V3Wr_VFMAL]>;
+
+def V3Wr_VBFDOT : SchedWriteRes<[V3UnitV]> { let Latency = 5; }
+def V3Rd_VBFDOT : SchedReadAdvance<2, [V3Wr_VBFDOT]>;
+def V3Wr_VBFMMA : SchedWriteRes<[V3UnitV]> { let Latency = 6; }
+def V3Rd_VBFMMA : SchedReadAdvance<2, [V3Wr_VBFMMA]>;
+def V3Wr_VBFMAL : SchedWriteRes<[V3UnitV]> { let Latency = 5; }
+def V3Rd_VBFMAL : SchedReadAdvance<3, [V3Wr_VBFMAL]>;
+
+def V3Wr_CRC : SchedWriteRes<[V3UnitM0]> { let Latency = 2; }
+def V3Rd_CRC : SchedReadAdvance<1, [V3Wr_CRC]>;
+
+def V3Wr_ZA : SchedWriteRes<[V3UnitV]> { let Latency = 4; }
+def V3Rd_ZA : SchedReadAdvance<3, [V3Wr_ZA]>;
+def V3Wr_ZPA : SchedWriteRes<[V3UnitV]> { let Latency = 4; }
+def V3Rd_ZPA : SchedReadAdvance<3, [V3Wr_ZPA]>;
+def V3Wr_ZSA : SchedWriteRes<[V3UnitV13]> { let Latency = 4; }
+def V3Rd_ZSA : SchedReadAdvance<3, [V3Wr_ZSA]>;
+
+def V3Wr_ZDOTB : SchedWriteRes<[V3UnitV]> { let Latency = 3; }
+def V3Rd_ZDOTB : SchedReadAdvance<2, [V3Wr_ZDOTB]>;
+def V3Wr_ZDOTH : SchedWriteRes<[V3UnitV02]> { let Latency = 3; }
+def V3Rd_ZDOTH : SchedReadAdvance<2, [V3Wr_ZDOTH]>;
+
+// NOTE: SOG p. 43: Complex multiply-add B, H, S element size: How to reduce
+// throughput to 1 in case of forwarding?
+def V3Wr_ZCMABHS : SchedWriteRes<[V3UnitV02]> { let Latency = 4; }
+def V3Rd_ZCMABHS : SchedReadAdvance<3, [V3Wr_ZCMABHS]>;
+def V3Wr_ZCMAD : SchedWriteRes<[V3UnitV02, V3UnitV02]> { let Latency = 5; }
+def V3Rd_ZCMAD : SchedReadAdvance<2, [V3Wr_ZCMAD]>;
+
+def V3Wr_ZMMA : SchedWriteRes<[V3UnitV]> { let Latency = 3; }
+def V3Rd_ZMMA : SchedReadAdvance<2, [V3Wr_ZMMA]>;
+
+def V3Wr_ZMABHS : SchedWriteRes<[V3UnitV02]> { let Latency = 4; }
+def V3Rd_ZMABHS : SchedReadAdvance<3, [V3Wr_ZMABHS]>;
+def V3Wr_ZMAD : SchedWriteRes<[V3UnitV02, V3UnitV02]> { let Latency = 5; }
+def V3Rd_ZMAD : SchedReadAdvance<2, [V3Wr_ZMAD]>;
+
+def V3Wr_ZMAL : SchedWriteRes<[V3UnitV02]> { let Latency = 4; }
+def V3Rd_ZMAL : SchedReadAdvance<3, [V3Wr_ZMAL]>;
+
+def V3Wr_ZMASQL : SchedWriteRes<[V3UnitV02]> { let Latency = 4; }
+def V3Wr_ZMASQBHS : SchedWriteRes<[V3UnitV02]> { let Latency = 4; }
+def V3Wr_ZMASQD : SchedWriteRes<[V3UnitV02, V3UnitV02]> { let Latency = 5; }
+def V3Rd_ZMASQ : SchedReadAdvance<2, [V3Wr_ZMASQL, V3Wr_ZMASQBHS,
+ V3Wr_ZMASQD]>;
+
+def V3Wr_ZFCMA : SchedWriteRes<[V3UnitV]> { let Latency = 5; }
+def V3Rd_ZFCMA : SchedReadAdvance<3, [V3Wr_ZFCMA]>;
+
+def V3Wr_ZFMA : SchedWriteRes<[V3UnitV]> { let Latency = 4; }
+def V3Rd_ZFMA : SchedReadAdvance<2, [V3Wr_ZFMA]>;
+
+def V3Wr_ZFMAL : SchedWriteRes<[V3UnitV]> { let Latency = 4; }
+def V3Rd_ZFMAL : SchedReadAdvance<2, [V3Wr_ZFMAL]>;
+
+def V3Wr_ZBFDOT : SchedWriteRes<[V3UnitV]> { let Latency = 5; }
+def V3Rd_ZBFDOT : SchedReadAdvance<2, [V3Wr_ZBFDOT]>;
+def V3Wr_ZBFMMA : SchedWriteRes<[V3UnitV]> { let Latency = 6; }
+def V3Rd_ZBFMMA : SchedReadAdvance<2, [V3Wr_ZBFMMA]>;
+def V3Wr_ZBFMAL : SchedWriteRes<[V3UnitV]> { let Latency = 5; }
+def V3Rd_ZBFMAL : SchedReadAdvance<3, [V3Wr_ZBFMAL]>;
+
+//===----------------------------------------------------------------------===//
+// Define types with long resource cycles (rc)
+
+def V3Write_6c_1V1_5rc : SchedWriteRes<[V3UnitV1]> { let Latency = 6; let ReleaseAtCycles = [ 5]; }
+def V3Write_9c_1V1_2rc : SchedWriteRes<[V3UnitV1]> { let Latency = 9; let ReleaseAtCycles = [ 2]; }
+def V3Write_9c_1V1_4rc : SchedWriteRes<[V3UnitV1]> { let Latency = 9; let ReleaseAtCycles = [ 4]; }
+def V3Write_10c_1V1_9rc : SchedWriteRes<[V3UnitV1]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
+def V3Write_11c_1V1_4rc : SchedWriteRes<[V3UnitV1]> { let Latency = 11; let ReleaseAtCycles = [ 4]; }
+def V3Write_13c_1V1_8rc : SchedWriteRes<[V3UnitV1]> { let Latency = 13; let ReleaseAtCycles = [8]; }
+def V3Write_14c_1V1_2rc : SchedWriteRes<[V3UnitV1]> { let Latency = 14; let ReleaseAtCycles = [2]; }
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// §3.3 Branch instructions
+// -----------------------------------------------------------------------------
+
+// Branch, immed
+// Compare and branch
+def : SchedAlias<WriteBr, V3Write_1c_1B>;
+
+// Branch, register
+def : SchedAlias<WriteBrReg, V3Write_1c_1B>;
+
+// Branch and link, immed
+// Branch and link, register
+def : InstRW<[V3Write_1c_1B_1S], (instrs BL, BLR)>;
+
+// §3.4 Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+
+// ALU, basic
+def : SchedAlias<WriteI, V3Write_1c_1I>;
+
+// ALU, basic, flagset
+def : InstRW<[V3Write_1c_1F_1Flg],
+ (instregex "^(ADD|SUB)S[WX]r[ir]$",
+ "^(ADC|SBC)S[WX]r$",
+ "^ANDS[WX]ri$",
+ "^(AND|BIC)S[WX]rr$")>;
+def : InstRW<[V3Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;
+
+// ALU, extend and shift
+def : SchedAlias<WriteIEReg, V3Write_2c_1M>;
+
+// Arithmetic, LSL shift, shift <= 4
+// Arithmetic, flagset, LSL shift, shift <= 4
+// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
+def : SchedAlias<WriteISReg, V3Write_ArithI>;
+def : InstRW<[V3Write_ArithF],
+ (instregex "^(ADD|SUB)S[WX]rs$")>;
+
+// Arithmetic, immediate to logical address tag
+def : InstRW<[V3Write_2c_1M], (instrs ADDG, SUBG)>;
+
+// Conditional compare
+def : InstRW<[V3Write_1c_1F_1Flg], (instregex "^CCM[NP][WX][ir]")>;
+
+// Convert floating-point condition flags
+// Flag manipulation instructions
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+// Insert Random Tags
+def : InstRW<[V3Write_2c_1M], (instrs IRG, IRGstack)>;
+
+// Insert Tag Mask
+// Subtract Pointer
+def : InstRW<[V3Write_1c_1I], (instrs GMI, SUBP)>;
+
+// Subtract Pointer, flagset
+def : InstRW<[V3Write_1c_1F_1Flg], (instrs SUBPS)>;
+
+// Logical, shift, no flagset
+def : InstRW<[V3Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
+def : InstRW<[V3Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;
+
+// Logical, shift, flagset
+def : InstRW<[V3Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
+
+// Move and shift instructions
+// -----------------------------------------------------------------------------
+
+def : SchedAlias<WriteImm, V3Write_1c_1I>;
+
+// §3.5 Divide and multiply instructions
+// -----------------------------------------------------------------------------
+
+// SDIV, UDIV
+def : SchedAlias<WriteID32, V3Write_12c_1M0>;
+def : SchedAlias<WriteID64, V3Write_20c_1M0>;
+
+def : SchedAlias<WriteIM32, V3Write_2c_1M>;
+def : SchedAlias<WriteIM64, V3Write_2c_1M>;
+
+// Multiply
+// Multiply accumulate, W-form
+// Multiply accumulate, X-form
+def : InstRW<[V3Wr_IM], (instregex "^M(ADD|SUB)[WX]rrr$")>;
+
+// Multiply accumulate long
+// Multiply long
+def : InstRW<[V3Wr_IM], (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
+
+// Multiply high
+def : InstRW<[V3Write_3c_1M], (instrs SMULHrr, UMULHrr)>;
+
+// §3.6 Pointer Authentication Instructions (v8.3 PAC)
+// -----------------------------------------------------------------------------
+
+// Authenticate data address
+// Authenticate instruction address
+// Compute pointer authentication code for data address
+// Compute pointer authentication code, using generic key
+// Compute pointer authentication code for instruction address
+def : InstRW<[V3Write_4c_1M0], (instregex "^AUT", "^PAC")>;
+
+// Branch and link, register, with pointer authentication
+// Branch, register, with pointer authentication
+// Branch, return, with pointer authentication
+def : InstRW<[V3Write_6c_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
+ BRAAZ, BRAB, BRABZ, RETAA, RETAB,
+ ERETAA, ERETAB)>;
+
+
+// Load register, with pointer authentication
+def : InstRW<[V3Write_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
+
+// Strip pointer authentication code
+def : InstRW<[V3Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>;
+
+// §3.7 Miscellaneous data-processing instructions
+// -----------------------------------------------------------------------------
+
+// Address generation
+def : InstRW<[V3Write_1c_1I], (instrs ADR, ADRP)>;
+
+// Bitfield extract, one reg
+// Bitfield extract, two regs
+def : SchedAlias<WriteExtr, V3Write_Extr>;
+def : InstRW<[V3Write_Extr], (instrs EXTRWrri, EXTRXrri)>;
+
+// Bitfield move, basic
+def : SchedAlias<WriteIS, V3Write_1c_1I>;
+
+// Bitfield move, insert
+def : InstRW<[V3Write_2c_1M], (instregex "^BFM[WX]ri$")>;
+
+// §3.8 Load instructions
+// -----------------------------------------------------------------------------
+
+// NOTE: SOG p. 19: Throughput of LDN?P X-form should be 2, but reported as 3.
+
+def : SchedAlias<WriteLD, V3Write_4c_1L>;
+def : SchedAlias<WriteLDIdx, V3Write_4c_1L>;
+
+// Load register, literal
+def : InstRW<[V3Write_5c_1L_1I], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>;
+
+// Load pair, signed immed offset, signed words
+def : InstRW<[V3Write_5c_1I_3L, WriteLDHi], (instrs LDPSWi)>;
+
+// Load pair, immed post-index or immed pre-index, signed words
+def : InstRW<[WriteAdr, V3Write_5c_1I_3L, WriteLDHi],
+ (instregex "^LDPSW(post|pre)$")>;
+
+// §3.9 Store instructions
+// -----------------------------------------------------------------------------
+
+// NOTE: SOG, p. 20: Unsure if STRH uses pipeline I.
+
+def : SchedAlias<WriteST, V3Write_1c_1SA_1D>;
+def : SchedAlias<WriteSTIdx, V3Write_1c_1SA_1D>;
+def : SchedAlias<WriteSTP, V3Write_1c_1SA_1D>;
+def : SchedAlias<WriteAdr, V3Write_1c_1I>;
+
+// §3.10 Tag load instructions
+// -----------------------------------------------------------------------------
+
+// Load allocation tag
+// Load multiple allocation tags
+def : InstRW<[V3Write_4c_1L], (instrs LDG, LDGM)>;
+
+// §3.11 Tag store instructions
+// -----------------------------------------------------------------------------
+
+// Store allocation tags to one or two granules, post-index
+// Store allocation tags to one or two granules, pre-index
+// Store allocation tag to one or two granules, zeroing, post-index
+// Store Allocation Tag to one or two granules, zeroing, pre-index
+// Store allocation tag and reg pair to memory, post-Index
+// Store allocation tag and reg pair to memory, pre-Index
+def : InstRW<[V3Write_1c_1SA_1D_1I], (instrs STGPreIndex, STGPostIndex,
+ ST2GPreIndex, ST2GPostIndex,
+ STZGPreIndex, STZGPostIndex,
+ STZ2GPreIndex, STZ2GPostIndex,
+ STGPpre, STGPpost)>;
+
+// Store allocation tags to one or two granules, signed offset
+// Store allocation tag to two granules, zeroing, signed offset
+// Store allocation tag and reg pair to memory, signed offset
+// Store multiple allocation tags
+def : InstRW<[V3Write_1c_1SA_1D], (instrs STGi, ST2Gi, STZGi,
+ STZ2Gi, STGPi, STGM, STZGM)>;
+
+// §3.12 FP data processing instructions
+// -----------------------------------------------------------------------------
+
+// FP absolute value
+// FP arithmetic
+// FP min/max
+// FP negate
+// FP select
+def : SchedAlias<WriteF, V3Write_2c_1V>;
+
+// FP compare
+def : SchedAlias<WriteFCmp, V3Write_2c_1V0>;
+
+// FP divide, square root
+def : SchedAlias<WriteFDiv, V3Write_6c_1V1>;
+
+// FP divide, H-form
+def : InstRW<[V3Write_6c_1V1], (instrs FDIVHrr)>;
+// FP divide, S-form
+def : InstRW<[V3Write_8c_1V1], (instrs FDIVSrr)>;
+// FP divide, D-form
+def : InstRW<[V3Write_13c_1V1], (instrs FDIVDrr)>;
+
+// FP square root, H-form
+def : InstRW<[V3Write_6c_1V1], (instrs FSQRTHr)>;
+// FP square root, S-form
+def : InstRW<[V3Write_8c_1V1], (instrs FSQRTSr)>;
+// FP square root, D-form
+def : InstRW<[V3Write_13c_1V1], (instrs FSQRTDr)>;
+
+// FP multiply
+def : WriteRes<WriteFMul, [V3UnitV]> { let Latency = 3; }
+
+// FP multiply accumulate
+def : InstRW<[V3Wr_FMA, ReadDefault, ReadDefault, V3Rd_FMA],
+ (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
+
+// FP round to integral
+def : InstRW<[V3Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
+ "^FRINT(32|64)[XZ][SD]r$")>;
+
+// §3.13 FP miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// FP convert, from gen to vec reg
+def : InstRW<[V3Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
+
+// FP convert, from vec to gen reg
+def : InstRW<[V3Write_3c_1V01],
+ (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>;
+
+// FP convert, Javascript from vec to gen reg
+def : SchedAlias<WriteFCvt, V3Write_3c_1V0>;
+
+// FP convert, from vec to vec reg
+def : InstRW<[V3Write_3c_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr,
+ FCVTHDr, FCVTSDr, FCVTXNv1i64)>;
+
+// FP move, immed
+// FP move, register
+def : SchedAlias<WriteFImm, V3Write_2c_1V>;
+
+// FP transfer, from gen to low half of vec reg
+def : InstRW<[V3Write_0or3c_1M0],
+ (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
+
+// FP transfer, from gen to high half of vec reg
+def : InstRW<[V3Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
+
+// FP transfer, from vec to gen reg
+def : SchedAlias<WriteFCopy, V3Write_2c_2V01>;
+
+// §3.14 FP load instructions
+// -----------------------------------------------------------------------------
+
+// Load vector reg, literal, S/D/Q forms
+def : InstRW<[V3Write_7c_1I_1L], (instregex "^LDR[SDQ]l$")>;
+
+// Load vector reg, unscaled immed
+def : InstRW<[V3Write_6c_1L], (instregex "^LDUR[BHSDQ]i$")>;
+
+// Load vector reg, immed post-index
+// Load vector reg, immed pre-index
+def : InstRW<[WriteAdr, V3Write_6c_1I_1L],
+ (instregex "^LDR[BHSDQ](pre|post)$")>;
+
+// Load vector reg, unsigned immed
+def : InstRW<[V3Write_6c_1L], (instregex "^LDR[BHSDQ]ui$")>;
+
+// Load vector reg, register offset, basic
+// Load vector reg, register offset, scale, S/D-form
+// Load vector reg, register offset, scale, H/Q-form
+// Load vector reg, register offset, extend
+// Load vector reg, register offset, extend, scale, S/D-form
+// Load vector reg, register offset, extend, scale, H/Q-form
+def : InstRW<[V3Write_LdrHQ, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>;
+
+// Load vector pair, immed offset, S/D-form
+def : InstRW<[V3Write_6c_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
+
+// Load vector pair, immed offset, Q-form
+def : InstRW<[V3Write_6c_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
+
+// Load vector pair, immed post-index, S/D-form
+// Load vector pair, immed pre-index, S/D-form
+def : InstRW<[WriteAdr, V3Write_6c_1I_1L, WriteLDHi],
+ (instregex "^LDP[SD](pre|post)$")>;
+
+// Load vector pair, immed post-index, Q-form
+// Load vector pair, immed pre-index, Q-form
+def : InstRW<[WriteAdr, V3Write_6c_2I_2L, WriteLDHi], (instrs LDPQpost,
+ LDPQpre)>;
+
+// §3.15 FP store instructions
+// -----------------------------------------------------------------------------
+
+// Store vector reg, unscaled immed, B/H/S/D-form
+// Store vector reg, unscaled immed, Q-form
+def : InstRW<[V3Write_2c_1SA_1V01], (instregex "^STUR[BHSDQ]i$")>;
+
+// Store vector reg, immed post-index, B/H/S/D-form
+// Store vector reg, immed post-index, Q-form
+// Store vector reg, immed pre-index, B/H/S/D-form
+// Store vector reg, immed pre-index, Q-form
+def : InstRW<[WriteAdr, V3Write_2c_1SA_1V01_1I],
+ (instregex "^STR[BHSDQ](pre|post)$")>;
+
+// Store vector reg, unsigned immed, B/H/S/D-form
+// Store vector reg, unsigned immed, Q-form
+def : InstRW<[V3Write_2c_1SA_1V01], (instregex "^STR[BHSDQ]ui$")>;
+
+// Store vector reg, register offset, basic, B/H/S/D-form
+// Store vector reg, register offset, basic, Q-form
+// Store vector reg, register offset, scale, H-form
+// Store vector reg, register offset, scale, S/D-form
+// Store vector reg, register offset, scale, Q-form
+// Store vector reg, register offset, extend, B/H/S/D-form
+// Store vector reg, register offset, extend, Q-form
+// Store vector reg, register offset, extend, scale, H-form
+// Store vector reg, register offset, extend, scale, S/D-form
+// Store vector reg, register offset, extend, scale, Q-form
+def : InstRW<[V3Write_StrHQ, ReadAdrBase],
+ (instregex "^STR[BHSDQ]ro[WX]$")>;
+
+// Store vector pair, immed offset, S-form
+// Store vector pair, immed offset, D-form
+def : InstRW<[V3Write_2c_1SA_1V01], (instregex "^STN?P[SD]i$")>;
+
+// Store vector pair, immed offset, Q-form
+def : InstRW<[V3Write_2c_1SA_2V01], (instrs STPQi, STNPQi)>;
+
+// Store vector pair, immed post-index, S-form
+// Store vector pair, immed post-index, D-form
+// Store vector pair, immed pre-index, S-form
+// Store vector pair, immed pre-index, D-form
+def : InstRW<[WriteAdr, V3Write_2c_1SA_1V01_1I],
+ (instregex "^STP[SD](pre|post)$")>;
+
+// Store vector pair, immed post-index, Q-form
+def : InstRW<[V3Write_2c_1SA_2V01_1I], (instrs STPQpost)>;
+
+// Store vector pair, immed pre-index, Q-form
+def : InstRW<[V3Write_2c_1SA_2V01_2I], (instrs STPQpre)>;
+
+// §3.16 ASIMD integer instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD absolute diff
+// ASIMD absolute diff long
+// ASIMD arith, basic
+// ASIMD arith, complex
+// ASIMD arith, pair-wise
+// ASIMD compare
+// ASIMD logical
+// ASIMD max/min, basic and pair-wise
+def : SchedAlias<WriteVd, V3Write_2c_1V>;
+def : SchedAlias<WriteVq, V3Write_2c_1V>;
+
+// ASIMD absolute diff accum
+// ASIMD absolute diff accum long
+def : InstRW<[V3Wr_VA, V3Rd_VA], (instregex "^[SU]ABAL?v")>;
+
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[V3Write_3c_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
+
+// ASIMD arith, reduce, 8B/8H
+def : InstRW<[V3Write_5c_1V13_1V],
+ (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
+
+// ASIMD arith, reduce, 16B
+def : InstRW<[V3Write_6c_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
+
+// ASIMD dot product
+// ASIMD dot product using signed and unsigned integers
+def : InstRW<[V3Wr_VDOT, V3Rd_VDOT],
+ (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
+
+// ASIMD matrix multiply-accumulate
+def : InstRW<[V3Wr_VMMA, V3Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
+
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[V3Write_3c_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
+ "^[SU](MAX|MIN)Vv4i32v$")>;
+
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[V3Write_5c_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
+ "^[SU](MAX|MIN)Vv8i16v$")>;
+
+// ASIMD max/min, reduce, 16B
+def : InstRW<[V3Write_6c_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
+
+// ASIMD multiply
+def : InstRW<[V3Write_4c_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
+
+// ASIMD multiply accumulate
+def : InstRW<[V3Wr_VMA, V3Rd_VMA], (instregex "^MLAv", "^MLSv")>;
+
+// ASIMD multiply accumulate high
+def : InstRW<[V3Wr_VMAH, V3Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[V3Wr_VMAL, V3Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
+
+// ASIMD multiply accumulate saturating long
+def : InstRW<[V3Write_4c_1V02], (instregex "^SQDML[AS]L[iv]")>;
+
+// ASIMD multiply/multiply long (8x8) polynomial, D-form
+// ASIMD multiply/multiply long (8x8) polynomial, Q-form
+def : InstRW<[V3Write_3c_1V], (instregex "^PMULL?(v8i8|v16i8)$")>;
+
+// ASIMD multiply long
+def : InstRW<[V3Write_3c_1V02], (instregex "^[SU]MULLv", "^SQDMULL[iv]")>;
+
+// ASIMD pairwise add and accumulate long
+def : InstRW<[V3Wr_VPA, V3Rd_VPA], (instregex "^[SU]ADALPv")>;
+
+// ASIMD shift accumulate
+def : InstRW<[V3Wr_VSA, V3Rd_VSA], (instregex "^[SU]SRA[dv]", "^[SU]RSRA[dv]")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[V3Write_2c_1V], (instregex "^SHL[dv]", "^SHLLv", "^SHRNv",
+ "^SSHLLv", "^SSHR[dv]", "^USHLLv",
+ "^USHR[dv]")>;
+
+// ASIMD shift by immed and insert, basic
+def : InstRW<[V3Write_2c_1V], (instregex "^SLI[dv]", "^SRI[dv]")>;
+
+// ASIMD shift by immed, complex
+def : InstRW<[V3Write_4c_1V],
+ (instregex "^RSHRNv", "^SQRSHRU?N[bhsv]", "^(SQSHLU?|UQSHL)[bhsd]$",
+ "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
+ "^SQSHRU?N[bhsv]", "^SRSHR[dv]", "^UQRSHRN[bhsv]",
+ "^UQSHRN[bhsv]", "^URSHR[dv]")>;
+
+// ASIMD shift by register, basic
+def : InstRW<[V3Write_2c_1V], (instregex "^[SU]SHLv")>;
+
+// ASIMD shift by register, complex
+def : InstRW<[V3Write_4c_1V],
+ (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
+ "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
+
+// §3.17 ASIMD floating-point instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD FP absolute value/difference
+// ASIMD FP arith, normal
+// ASIMD FP compare
+// ASIMD FP complex add
+// ASIMD FP max/min, normal
+// ASIMD FP max/min, pairwise
+// ASIMD FP negate
+// Handled by SchedAlias<WriteV[dq], ...>
+
+// ASIMD FP complex multiply add
+def : InstRW<[V3Wr_VFCMA, V3Rd_VFCMA], (instregex "^FCMLAv")>;
+
+// ASIMD FP convert, long (F16 to F32)
+def : InstRW<[V3Write_4c_2V02], (instregex "^FCVTL(v4|v8)i16")>;
+
+// ASIMD FP convert, long (F32 to F64)
+def : InstRW<[V3Write_3c_1V02], (instregex "^FCVTL(v2|v4)i32")>;
+
+// ASIMD FP convert, narrow (F32 to F16)
+def : InstRW<[V3Write_4c_2V02], (instregex "^FCVTN(v4|v8)i16")>;
+
+// ASIMD FP convert, narrow (F64 to F32)
+def : InstRW<[V3Write_3c_1V02], (instregex "^FCVTN(v2|v4)i32",
+ "^FCVTXN(v2|v4)f32")>;
+
+// ASIMD FP convert, other, D-form F32 and Q-form F64
+def : InstRW<[V3Write_3c_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
+ "^FCVT[AMNPZ][SU]v2i(32|64)_shift$",
+ "^FCVT[AMNPZ][SU]v1i64$",
+ "^FCVTZ[SU]d$",
+ "^[SU]CVTFv2f(32|64)$",
+ "^[SU]CVTFv2i(32|64)_shift$",
+ "^[SU]CVTFv1i64$",
+ "^[SU]CVTFd$")>;
+
+// ASIMD FP convert, other, D-form F16 and Q-form F32
+def : InstRW<[V3Write_4c_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
+ "^FCVT[AMNPZ][SU]v4i(16|32)_shift$",
+ "^FCVT[AMNPZ][SU]v1i32$",
+ "^FCVTZ[SU]s$",
+ "^[SU]CVTFv4f(16|32)$",
+ "^[SU]CVTFv4i(16|32)_shift$",
+ "^[SU]CVTFv1i32$",
+ "^[SU]CVTFs$")>;
+
+// ASIMD FP convert, other, Q-form F16
+def : InstRW<[V3Write_6c_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
+ "^FCVT[AMNPZ][SU]v8i16_shift$",
+ "^FCVT[AMNPZ][SU]v1f16$",
+ "^FCVTZ[SU]h$",
+ "^[SU]CVTFv8f16$",
+ "^[SU]CVTFv8i16_shift$",
+ "^[SU]CVTFv1i16$",
+ "^[SU]CVTFh$")>;
+
+// ASIMD FP divide, D-form, F16
+def : InstRW<[V3Write_9c_1V1_4rc], (instrs FDIVv4f16)>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[V3Write_9c_1V1_2rc], (instrs FDIVv2f32)>;
+
+// ASIMD FP divide, Q-form, F16
+def : InstRW<[V3Write_13c_1V1_8rc], (instrs FDIVv8f16)>;
+
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[V3Write_11c_1V1_4rc], (instrs FDIVv4f32)>;
+
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[V3Write_14c_1V1_2rc], (instrs FDIVv2f64)>;
+
+// ASIMD FP max/min, reduce, F32 and D-form F16
+def : InstRW<[V3Write_4c_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
+
+// ASIMD FP max/min, reduce, Q-form F16
+def : InstRW<[V3Write_6c_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
+
+// ASIMD FP multiply
+def : InstRW<[V3Wr_VFM], (instregex "^FMULv", "^FMULXv")>;
+
+// ASIMD FP multiply accumulate
+def : InstRW<[V3Wr_VFMA, V3Rd_VFMA], (instregex "^FMLAv", "^FMLSv")>;
+
+// ASIMD FP multiply accumulate long
+def : InstRW<[V3Wr_VFMAL, V3Rd_VFMAL], (instregex "^FML[AS]L2?(lane)?v")>;
+
+// ASIMD FP round, D-form F32 and Q-form F64
+def : InstRW<[V3Write_3c_1V02],
+ (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
+ "^FRINT(32|64)[XZ]v2f(32|64)$")>;
+
+// ASIMD FP round, D-form F16 and Q-form F32
+def : InstRW<[V3Write_4c_2V02],
+ (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
+ "^FRINT(32|64)[XZ]v4f32$")>;
+
+// ASIMD FP round, Q-form F16
+def : InstRW<[V3Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
+
+// ASIMD FP square root, D-form, F16
+def : InstRW<[V3Write_9c_1V1_4rc], (instrs FSQRTv4f16)>;
+
+// ASIMD FP square root, D-form, F32
+def : InstRW<[V3Write_9c_1V1_2rc], (instrs FSQRTv2f32)>;
+
+// ASIMD FP square root, Q-form, F16
+def : InstRW<[V3Write_13c_1V1_8rc], (instrs FSQRTv8f16)>;
+
+// ASIMD FP square root, Q-form, F32
+def : InstRW<[V3Write_11c_1V1_4rc], (instrs FSQRTv4f32)>;
+
+// ASIMD FP square root, Q-form, F64
+def : InstRW<[V3Write_14c_1V1_2rc], (instrs FSQRTv2f64)>;
+
+// §3.18 ASIMD BFloat16 (BF16) instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD convert, F32 to BF16
+def : InstRW<[V3Write_4c_2V02], (instrs BFCVTN, BFCVTN2)>;
+
+// ASIMD dot product
+def : InstRW<[V3Wr_VBFDOT, V3Rd_VBFDOT], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
+
+// ASIMD matrix multiply accumulate
+def : InstRW<[V3Wr_VBFMMA, V3Rd_VBFMMA], (instrs BFMMLA)>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[V3Wr_VBFMAL, V3Rd_VBFMAL], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
+ BFMLALTIdx)>;
+
+// Scalar convert, F32 to BF16
+def : InstRW<[V3Write_3c_1V02], (instrs BFCVT)>;
+
+// §3.19 ASIMD miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD bit reverse
+// ASIMD bitwise insert
+// ASIMD count
+// ASIMD duplicate, element
+// ASIMD extract
+// ASIMD extract narrow
+// ASIMD insert, element to element
+// ASIMD move, FP immed
+// ASIMD move, integer immed
+// ASIMD reverse
+// ASIMD table lookup extension, 1 table reg
+// ASIMD transpose
+// ASIMD unzip/zip
+// Handled by SchedAlias<WriteV[dq], ...>
+def : InstRW<[V3Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;
+
+// ASIMD duplicate, gen reg
+def : InstRW<[V3Write_3c_1M0], (instregex "^DUPv.+gpr")>;
+
+// ASIMD extract narrow, saturating
+def : InstRW<[V3Write_4c_1V], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
+
+// ASIMD reciprocal and square root estimate, D-form U32
+def : InstRW<[V3Write_3c_1V02], (instrs URECPEv2i32, URSQRTEv2i32)>;
+
+// ASIMD reciprocal and square root estimate, Q-form U32
+def : InstRW<[V3Write_4c_2V02], (instrs URECPEv4i32, URSQRTEv4i32)>;
+
+// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
+def : InstRW<[V3Write_3c_1V02], (instrs FRECPEv1f16, FRECPEv1i32,
+ FRECPEv1i64, FRECPEv2f32,
+ FRSQRTEv1f16, FRSQRTEv1i32,
+ FRSQRTEv1i64, FRSQRTEv2f32)>;
+
+// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
+def : InstRW<[V3Write_4c_2V02], (instrs FRECPEv4f16, FRECPEv4f32,
+ FRSQRTEv4f16, FRSQRTEv4f32)>;
+
+// ASIMD reciprocal and square root estimate, Q-form F16
+def : InstRW<[V3Write_6c_4V02], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
+
+// ASIMD reciprocal exponent
+def : InstRW<[V3Write_3c_1V02], (instregex "^FRECPXv")>;
+
+// ASIMD reciprocal step
+def : InstRW<[V3Write_4c_1V], (instregex "^FRECPS(32|64|v)",
+ "^FRSQRTS(32|64|v)")>;
+
+// ASIMD table lookup, 1 or 2 table regs
+def : InstRW<[V3Write_2c_1V], (instrs TBLv8i8One, TBLv16i8One,
+ TBLv8i8Two, TBLv16i8Two)>;
+
+// ASIMD table lookup, 3 table regs
+def : InstRW<[V3Write_4c_2V], (instrs TBLv8i8Three, TBLv16i8Three)>;
+
+// ASIMD table lookup, 4 table regs
+def : InstRW<[V3Write_4c_3V], (instrs TBLv8i8Four, TBLv16i8Four)>;
+
+// ASIMD table lookup extension, 2 table reg
+def : InstRW<[V3Write_4c_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
+
+// ASIMD table lookup extension, 3 table reg
+def : InstRW<[V3Write_6c_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
+
+// ASIMD table lookup extension, 4 table reg
+def : InstRW<[V3Write_6c_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[V3Write_2c_2V01], (instregex "^[SU]MOVv")>;
+
+// ASIMD transfer, gen reg to element
+def : InstRW<[V3Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
+
+// §3.20 ASIMD load instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD load, 1 element, multiple, 1 reg, D-form
+def : InstRW<[V3Write_6c_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3Write_6c_1L],
+ (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[V3Write_6c_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_6c_1L],
+ (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, D-form
+def : InstRW<[V3Write_6c_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3Write_6c_2L],
+ (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[V3Write_6c_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_6c_2L],
+ (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, D-form
+def : InstRW<[V3Write_6c_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3Write_6c_3L],
+ (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[V3Write_6c_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_6c_3L],
+ (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, D-form
+def : InstRW<[V3Write_7c_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3Write_7c_4L],
+ (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[V3Write_7c_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_7c_4L],
+ (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, one lane, B/H/S
+// ASIMD load, 1 element, one lane, D
+def : InstRW<[V3Write_8c_1L_1V], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, D-form, B/H/S
+// ASIMD load, 1 element, all lanes, D-form, D
+def : InstRW<[V3Write_8c_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, Q-form
+def : InstRW<[V3Write_8c_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, multiple, D-form, B/H/S
+def : InstRW<[V3Write_8c_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 2 element, multiple, Q-form, B/H/S
+// ASIMD load, 2 element, multiple, Q-form, D
+def : InstRW<[V3Write_8c_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, one lane, B/H
+// ASIMD load, 2 element, one lane, S
+// ASIMD load, 2 element, one lane, D
+def : InstRW<[V3Write_8c_1L_2V], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, D-form, B/H/S
+// ASIMD load, 2 element, all lanes, D-form, D
+def : InstRW<[V3Write_8c_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, Q-form
+def : InstRW<[V3Write_8c_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple, D-form, B/H/S
+def : InstRW<[V3Write_8c_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 3 element, multiple, Q-form, B/H/S
+// ASIMD load, 3 element, multiple, Q-form, D
+def : InstRW<[V3Write_8c_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, one lane, B/H
+// ASIMD load, 3 element, one lane, S
+// ASIMD load, 3 element, one lane, D
+def : InstRW<[V3Write_8c_2L_3V], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, D-form, B/H/S
+// ASIMD load, 3 element, all lanes, D-form, D
+def : InstRW<[V3Write_8c_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, Q-form, B/H/S
+// ASIMD load, 3 element, all lanes, Q-form, D
+def : InstRW<[V3Write_8c_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, D-form, B/H/S
+def : InstRW<[V3Write_8c_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 4 element, multiple, Q-form, B/H/S
+// ASIMD load, 4 element, multiple, Q-form, D
+def : InstRW<[V3Write_9c_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_9c_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, one lane, B/H
+// ASIMD load, 4 element, one lane, S
+// ASIMD load, 4 element, one lane, D
+def : InstRW<[V3Write_8c_3L_4V], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, D-form, B/H/S
+// ASIMD load, 4 element, all lanes, D-form, D
+def : InstRW<[V3Write_8c_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, Q-form, B/H/S
+// ASIMD load, 4 element, all lanes, Q-form, D
+def : InstRW<[V3Write_8c_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_8c_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+// §3.21 ASIMD store instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD store, 1 element, multiple, 1 reg, D-form
+def : InstRW<[V3Write_2c_1SA_1V01], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3Write_2c_1SA_1V01], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[V3Write_2c_1SA_1V01], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_2c_1SA_1V01], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, D-form
+def : InstRW<[V3Write_2c_1SA_1V01], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3Write_2c_1SA_1V01], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[V3Write_2c_2SA_2V01], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_2c_2SA_2V01], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, D-form
+def : InstRW<[V3Write_2c_2SA_2V01], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3Write_2c_2SA_2V01], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[V3Write_2c_3SA_3V01], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_2c_3SA_3V01], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, D-form
+def : InstRW<[V3Write_2c_2SA_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3Write_2c_2SA_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[V3Write_2c_4SA_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_2c_4SA_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, one lane, B/H/S
+// ASIMD store, 1 element, one lane, D
+def : InstRW<[V3Write_4c_1SA_2V01], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3Write_4c_1SA_2V01], (instregex "ST1i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 2 element, multiple, D-form, B/H/S
+def : InstRW<[V3Write_4c_1SA_2V01], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, V3Write_4c_1SA_2V01], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 2 element, multiple, Q-form, B/H/S
+// ASIMD store, 2 element, multiple, Q-form, D
+def : InstRW<[V3Write_4c_2SA_4V01], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_4c_2SA_4V01], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 2 element, one lane, B/H/S
+// ASIMD store, 2 element, one lane, D
+def : InstRW<[V3Write_4c_1SA_2V01], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3Write_4c_1SA_2V01], (instregex "ST2i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 3 element, multiple, D-form, B/H/S
+def : InstRW<[V3Write_5c_2SA_4V01], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, V3Write_5c_2SA_4V01], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 3 element, multiple, Q-form, B/H/S
+// ASIMD store, 3 element, multiple, Q-form, D
+def : InstRW<[V3Write_6c_3SA_6V01], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3Write_6c_3SA_6V01], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 3 element, one lane, B/H
+// ASIMD store, 3 element, one lane, S
+// ASIMD store, 3 element, one lane, D
+def : InstRW<[V3Write_5c_2SA_4V01], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3Write_5c_2SA_4V01], (instregex "ST3i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 4 element, multiple, D-form, B/H/S
+def : InstRW<[V3Write_6c_2SA_6V01], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, V3Write_6c_2SA_6V01], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 4 element, multiple, Q-form, B/H/S
+def : InstRW<[V3Write_7c_4SA_12V01], (instregex "ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[WriteAdr, V3Write_7c_4SA_12V01], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+
+// ASIMD store, 4 element, multiple, Q-form, D
+def : InstRW<[V3Write_5c_4SA_8V01], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[WriteAdr, V3Write_5c_4SA_8V01], (instregex "ST4Fourv(2d)_POST$")>;
+
+// ASIMD store, 4 element, one lane, B/H/S
+def : InstRW<[V3Write_6c_1SA_3V01], (instregex "ST4i(8|16|32)$")>;
+def : InstRW<[WriteAdr, V3Write_6c_1SA_3V01], (instregex "ST4i(8|16|32)_POST$")>;
+
+// ASIMD store, 4 element, one lane, D
+def : InstRW<[V3Write_4c_2SA_4V01], (instregex "ST4i(64)$")>;
+def : InstRW<[WriteAdr, V3Write_4c_2SA_4V01], (instregex "ST4i(64)_POST$")>;
+
+// §3.22 Cryptography extensions
+// -----------------------------------------------------------------------------
+
+// Crypto AES ops
+def : InstRW<[V3Write_2c_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
+
+// Crypto polynomial (64x64) multiply long
+def : InstRW<[V3Write_2c_1V], (instrs PMULLv1i64, PMULLv2i64)>;
+
+// Crypto SHA1 hash acceleration op
+// Crypto SHA1 schedule acceleration ops
+def : InstRW<[V3Write_2c_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
+
+// Crypto SHA1 hash acceleration ops
+// Crypto SHA256 hash acceleration ops
+def : InstRW<[V3Write_4c_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
+
+// Crypto SHA256 schedule acceleration ops
+def : InstRW<[V3Write_2c_1V0], (instregex "^SHA256SU[01]")>;
+
+// Crypto SHA512 hash acceleration ops
+def : InstRW<[V3Write_2c_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
+
+// Crypto SHA3 ops
+def : InstRW<[V3Write_2c_1V], (instrs BCAX, EOR3, RAX1, XAR)>;
+
+// Crypto SM3 ops
+def : InstRW<[V3Write_2c_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
+ "^SM3TT[12][AB]$")>;
+
+// Crypto SM4 ops
+def : InstRW<[V3Write_4c_1V0], (instrs SM4E, SM4ENCKEY)>;
+
+// §3.23 CRC
+// -----------------------------------------------------------------------------
+
+def : InstRW<[V3Wr_CRC, V3Rd_CRC], (instregex "^CRC32")>;
+
+// §3.24 SVE Predicate instructions
+// -----------------------------------------------------------------------------
+
+// Loop control, based on predicate
+def : InstRW<[V3Write_2or3c_1M], (instrs BRKA_PPmP, BRKA_PPzP,
+ BRKB_PPmP, BRKB_PPzP)>;
+
+// Loop control, based on predicate and flag setting
+def : InstRW<[V3Write_2or3c_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
+
+// Loop control, propagating
+def : InstRW<[V3Write_2or3c_1M], (instrs BRKN_PPzP, BRKPA_PPzPP,
+ BRKPB_PPzPP)>;
+
+// Loop control, propagating and flag setting
+def : InstRW<[V3Write_2or3c_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
+ BRKPBS_PPzPP)>;
+
+// Loop control, based on GPR
+def : InstRW<[V3Write_3c_2M],
+ (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
+def : InstRW<[V3Write_3c_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
+
+// Loop terminate
+def : InstRW<[V3Write_1c_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
+
+// Predicate counting scalar
+def : InstRW<[V3Write_2c_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
+def : InstRW<[V3Write_2c_1M],
+ (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI",
+ "^SQ(DEC|INC)[BHWD]_XPiWdI",
+ "^UQ(DEC|INC)[BHWD]_WPiI")>;
+
+// Predicate counting scalar, ALL, {1,2,4}
+def : InstRW<[V3Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>;
+
+// Predicate counting scalar, active predicate
+def : InstRW<[V3Write_2c_1M],
+ (instregex "^CNTP_XPP_[BHSD]",
+ "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
+ "^(UQDEC|UQINC)P_WP_[BHSD]",
+ "^(SQDEC|SQINC)P_XPWd_[BHSD]")>;
+
+// Predicate counting vector, active predicate
+def : InstRW<[V3Write_7c_1M_1M0_1V],
+ (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
+
+// Predicate logical
+def : InstRW<[V3Write_1or2c_1M],
+ (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
+
+// Predicate logical, flag setting
+def : InstRW<[V3Write_1or2c_1M],
+ (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
+
+// Predicate reverse
+def : InstRW<[V3Write_2c_1M], (instregex "^REV_PP_[BHSD]")>;
+
+// Predicate select
+def : InstRW<[V3Write_1c_1M], (instrs SEL_PPPP)>;
+
+// Predicate set
+def : InstRW<[V3Write_2c_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
+
+// Predicate set/initialize, set flags
+def : InstRW<[V3Write_2c_1M], (instregex "^PTRUES_[BHSD]")>;
+
+// Predicate find first/next
+def : InstRW<[V3Write_2c_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
+
+// Predicate test
+def : InstRW<[V3Write_1c_1M], (instrs PTEST_PP)>;
+
+// Predicate transpose
+def : InstRW<[V3Write_2c_1M], (instregex "^TRN[12]_PPP_[BHSD]")>;
+
+// Predicate unpack and widen
+def : InstRW<[V3Write_2c_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
+
+// Predicate zip/unzip
+def : InstRW<[V3Write_2c_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>;
+
+// §3.25 SVE integer instructions
+// -----------------------------------------------------------------------------
+
+// Arithmetic, absolute diff
+def : InstRW<[V3Write_2c_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
+ "^[SU]ABD_ZPZZ_[BHSD]")>;
+
+// Arithmetic, absolute diff accum
+def : InstRW<[V3Wr_ZA, V3Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
+
+// Arithmetic, absolute diff accum long
+def : InstRW<[V3Wr_ZA, V3Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
+
+// Arithmetic, absolute diff long
+def : InstRW<[V3Write_2c_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
+
+// Arithmetic, basic
+def : InstRW<[V3Write_2c_1V],
+ (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^(ADD|SUB)_ZZZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZI_[BHSD]",
+ "^ADR_[SU]XTW_ZZZ_D_[0123]",
+ "^ADR_LSL_ZZZ_[SD]_[0123]",
+ "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
+ "^SADDLBT_ZZZ_[HSD]",
+ "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
+
+// Arithmetic, complex
+def : InstRW<[V3Write_2c_1V],
+ (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
+ "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
+ "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
+ "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
+ "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
+
+// Arithmetic, large integer
+def : InstRW<[V3Write_2c_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
+
+// Arithmetic, pairwise add
+def : InstRW<[V3Write_2c_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>;
+
+// Arithmetic, pairwise add and accum long
+def : InstRW<[V3Wr_ZPA, ReadDefault, V3Rd_ZPA],
+ (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
+
+// Arithmetic, shift
+def : InstRW<[V3Write_2c_1V13],
+ (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
+ "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
+ "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
+ "^(ASR|LSL|LSR)_ZZI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
+ "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
+
+// Arithmetic, shift and accumulate
+def : InstRW<[V3Wr_ZSA, V3Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>;
+
+// Arithmetic, shift by immediate
+def : InstRW<[V3Write_2c_1V], (instregex "^SHRN[BT]_ZZI_[BHS]",
+ "^[SU]SHLL[BT]_ZZI_[HSD]")>;
+
+// Arithmetic, shift by immediate and insert
+def : InstRW<[V3Write_2c_1V], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>;
+
+// Arithmetic, shift complex
+def : InstRW<[V3Write_4c_1V],
+ (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
+ "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
+ "^[SU]QR?SHL_ZPZZ_[BHSD]",
+ "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
+ "^SQSHRU?N[BT]_ZZI_[BHS]",
+ "^UQR?SHRN[BT]_ZZI_[BHS]")>;
+
+// Arithmetic, shift right for divide
+def : InstRW<[V3Write_4c_1V], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
+
+// Arithmetic, shift rounding
+def : InstRW<[V3Write_4c_1V], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
+ "^[SU]RSHL_ZPZZ_[BHSD]",
+ "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
+
+// Bit manipulation
+def : InstRW<[V3Write_6c_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
+
+// Bitwise select
+def : InstRW<[V3Write_2c_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
+
+// Count/reverse bits
+def : InstRW<[V3Write_2c_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
+
+// Broadcast logical bitmask immediate to vector
+def : InstRW<[V3Write_2c_1V], (instrs DUPM_ZI)>;
+
+// Compare and set flags
+def : InstRW<[V3Write_2or3c_1V0],
+ (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
+ "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
+
+// Complex add
+def : InstRW<[V3Write_2c_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>;
+
+// Complex dot product 8-bit element
+def : InstRW<[V3Wr_ZDOTB, V3Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
+
+// Complex dot product 16-bit element
+def : InstRW<[V3Wr_ZDOTH, V3Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
+
+// Complex multiply-add B, H, S element size
+def : InstRW<[V3Wr_ZCMABHS, V3Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]",
+ "^CMLA_ZZZI_[HS]")>;
+
+// Complex multiply-add D element size
+def : InstRW<[V3Wr_ZCMAD, V3Rd_ZCMAD], (instrs CMLA_ZZZ_D)>;
+
+// Conditional extract operations, scalar form
+def : InstRW<[V3Write_8c_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
+
+// Conditional extract operations, SIMD&FP scalar and vector forms
+def : InstRW<[V3Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
+ "^COMPACT_ZPZ_[SD]",
+ "^SPLICE_ZPZZ?_[BHSD]")>;
+
+// Convert to floating point, 64b to float or convert to double
+def : InstRW<[V3Write_3c_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
+ "^[SU]CVTF_ZPmZ_StoD")>;
+
+// Convert to floating point, 32b to single or half
+def : InstRW<[V3Write_4c_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
+
+// Convert to floating point, 16b to half
+def : InstRW<[V3Write_6c_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
+
+// Copy, scalar
+def : InstRW<[V3Write_5c_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>;
+
+// Copy, scalar SIMD&FP or imm
+def : InstRW<[V3Write_2c_1V], (instregex "^CPY_ZPm[IV]_[BHSD]",
+ "^CPY_ZPzI_[BHSD]")>;
+
+// Divides, 32 bit
+def : InstRW<[V3Write_12c_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
+ "^[SU]DIV_ZPZZ_S")>;
+
+// Divides, 64 bit
+def : InstRW<[V3Write_20c_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
+ "^[SU]DIV_ZPZZ_D")>;
+
+// Dot product, 8 bit
+def : InstRW<[V3Wr_ZDOTB, V3Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_BtoS")>;
+
+// Dot product, 8 bit, using signed and unsigned integers
+def : InstRW<[V3Wr_ZDOTB, V3Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
+
+// Dot product, 16 bit
+def : InstRW<[V3Wr_ZDOTH, V3Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_HtoD")>;
+
+// Duplicate, immediate and indexed form
+def : InstRW<[V3Write_2c_1V], (instregex "^DUP_ZI_[BHSD]",
+ "^DUP_ZZI_[BHSDQ]")>;
+
+// Duplicate, scalar form
+def : InstRW<[V3Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]")>;
+
+// Extend, sign or zero
+def : InstRW<[V3Write_2c_1V], (instregex "^[SU]XTB_ZPmZ_[HSD]",
+ "^[SU]XTH_ZPmZ_[SD]",
+ "^[SU]XTW_ZPmZ_[D]")>;
+
+// Extract
+def : InstRW<[V3Write_2c_1V], (instrs EXT_ZZI, EXT_ZZI_CONSTRUCTIVE, EXT_ZZI_B)>;
+
+// Extract narrow saturating
+def : InstRW<[V3Write_4c_1V], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
+ "^SQXTUN[BT]_ZZ_[BHS]")>;
+
+// Extract operation, SIMD and FP scalar form
+def : InstRW<[V3Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]")>;
+
+// Extract operation, scalar
+def : InstRW<[V3Write_6c_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]")>;
+
+// Histogram operations
+def : InstRW<[V3Write_2c_1V], (instregex "^HISTCNT_ZPzZZ_[SD]",
+ "^HISTSEG_ZZZ")>;
+
+// Horizontal operations, B, H, S form, immediate operands only
+def : InstRW<[V3Write_4c_1V02], (instregex "^INDEX_II_[BHS]")>;
+
+// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
+// operands only / immediate, scalar operands
+def : InstRW<[V3Write_7c_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
+
+// Horizontal operations, D form, immediate operands only
+def : InstRW<[V3Write_5c_2V02], (instrs INDEX_II_D)>;
+
+// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
+// only / immediate, scalar operands
+def : InstRW<[V3Write_8c_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>;
+
+// insert operation, SIMD and FP scalar form
+def : InstRW<[V3Write_2c_1V], (instregex "^INSR_ZV_[BHSD]")>;
+
+// insert operation, scalar
+def : InstRW<[V3Write_5c_1V1_1M0], (instregex "^INSR_ZR_[BHSD]")>;
+
+// Logical
+def : InstRW<[V3Write_2c_1V],
+ (instregex "^(AND|EOR|ORR)_ZI",
+ "^(AND|BIC|EOR|ORR)_ZZZ",
+ "^EOR(BT|TB)_ZZZ_[BHSD]",
+ "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
+ "^NOT_ZPmZ_[BHSD]")>;
+
+// Max/min, basic and pairwise
+def : InstRW<[V3Write_2c_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
+ "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
+ "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
+
+// Matching operations
+// FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the
+// latency for this instruction is 4 cycles.
+def : InstRW<[V3Write_2or3c_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>;
+
+// Matrix multiply-accumulate
+def : InstRW<[V3Wr_ZMMA, V3Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
+
+// Move prefix
+def : InstRW<[V3Write_2c_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
+ "^MOVPRFX_ZZ")>;
+
+// Multiply, B, H, S element size
+def : InstRW<[V3Write_4c_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
+ "^MUL_ZPZZ_[BHS]",
+ "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
+ "^[SU]MULH_ZPZZ_[BHS]")>;
+
+// Multiply, D element size
+def : InstRW<[V3Write_5c_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
+ "^MUL_ZPZZ_D",
+ "^[SU]MULH_(ZPmZ|ZZZ)_D",
+ "^[SU]MULH_ZPZZ_D")>;
+
+// Multiply long
+def : InstRW<[V3Write_4c_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
+ "^[SU]MULL[BT]_ZZZ_[HSD]")>;
+
+// Multiply accumulate, B, H, S element size
+def : InstRW<[V3Wr_ZMABHS, V3Rd_ZMABHS],
+ (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>;
+def : InstRW<[V3Wr_ZMABHS, ReadDefault, V3Rd_ZMABHS],
+ (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
+
+// Multiply accumulate, D element size
+def : InstRW<[V3Wr_ZMAD, V3Rd_ZMAD],
+ (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>;
+def : InstRW<[V3Wr_ZMAD, ReadDefault, V3Rd_ZMAD],
+ (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
+
+// Multiply accumulate long
+def : InstRW<[V3Wr_ZMAL, V3Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
+ "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
+
+// Multiply accumulate saturating doubling long regular
+def : InstRW<[V3Wr_ZMASQL, V3Rd_ZMASQ],
+ (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]",
+ "^SQDML[AS]L[BT]_ZZZI_[SD]")>;
+
+// Multiply saturating doubling high, B, H, S element size
+def : InstRW<[V3Write_4c_1V02], (instregex "^SQDMULH_ZZZ_[BHS]",
+ "^SQDMULH_ZZZI_[HS]")>;
+
+// Multiply saturating doubling high, D element size
+def : InstRW<[V3Write_5c_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
+
+// Multiply saturating doubling long
+def : InstRW<[V3Write_4c_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
+ "^SQDMULL[BT]_ZZZI_[SD]")>;
+
+// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
+// element size
+def : InstRW<[V3Wr_ZMASQBHS, V3Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
+ "^SQRDCMLAH_ZZZ_[BHS]",
+ "^SQRDML[AS]H_ZZZI_[HS]",
+ "^SQRDCMLAH_ZZZI_[HS]")>;
+
+// Multiply saturating rounding doubling regular/complex accumulate, D element
+// size
+def : InstRW<[V3Wr_ZMASQD, V3Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D",
+ "^SQRDCMLAH_ZZZ_D")>;
+
+// Multiply saturating rounding doubling regular/complex, B, H, S element size
+def : InstRW<[V3Write_4c_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]",
+ "^SQRDMULH_ZZZI_[HS]")>;
+
+// Multiply saturating rounding doubling regular/complex, D element size
+def : InstRW<[V3Write_5c_2V02], (instregex "^SQRDMULH_ZZZI?_D")>;
+
+// Multiply/multiply long, (8x8) polynomial
+def : InstRW<[V3Write_2c_1V], (instregex "^PMUL_ZZZ_B",
+ "^PMULL[BT]_ZZZ_[HDQ]")>;
+
+// Predicate counting vector
+def : InstRW<[V3Write_2c_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>;
+
+// Reciprocal estimate
+def : InstRW<[V3Write_4c_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
+
+// Reduction, arithmetic, B form
+def : InstRW<[V3Write_9c_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
+
+// Reduction, arithmetic, H form
+def : InstRW<[V3Write_8c_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
+
+// Reduction, arithmetic, S form
+def : InstRW<[V3Write_6c_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
+
+// Reduction, arithmetic, D form
+def : InstRW<[V3Write_4c_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
+
+// Reduction, logical
+def : InstRW<[V3Write_6c_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>;
+
+// Reverse, vector
+def : InstRW<[V3Write_2c_1V], (instregex "^REV_ZZ_[BHSD]",
+ "^REVB_ZPmZ_[HSD]",
+ "^REVH_ZPmZ_[SD]",
+ "^REVW_ZPmZ_D")>;
+
+// Select, vector form
+def : InstRW<[V3Write_2c_1V], (instregex "^SEL_ZPZZ_[BHSD]")>;
+
+// Table lookup
+def : InstRW<[V3Write_2c_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>;
+
+// Table lookup extension
+def : InstRW<[V3Write_2c_1V], (instregex "^TBX_ZZZ_[BHSD]")>;
+
+// Transpose, vector form
+def : InstRW<[V3Write_2c_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
+
+// Unpack and extend
+def : InstRW<[V3Write_2c_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
+
+// Zip/unzip
+def : InstRW<[V3Write_2c_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
+
+// §3.26 SVE floating-point instructions
+// -----------------------------------------------------------------------------
+
+// Floating point absolute value/difference
+def : InstRW<[V3Write_2c_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
+ "^FABD_ZPZZ_[HSD]",
+ "^FABS_ZPmZ_[HSD]")>;
+
+// Floating point arithmetic
+def : InstRW<[V3Write_2c_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
+ "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
+ "^FADDP_ZPmZZ_[HSD]",
+ "^FNEG_ZPmZ_[HSD]",
+ "^FSUBR_ZPm[IZ]_[HSD]",
+ "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
+
+// Floating point associative add, F16
+def : InstRW<[V3Write_10c_1V1_9rc], (instrs FADDA_VPZ_H)>;
+
+// Floating point associative add, F32
+def : InstRW<[V3Write_6c_1V1_5rc], (instrs FADDA_VPZ_S)>;
+
+// Floating point associative add, F64
+def : InstRW<[V3Write_4c_1V], (instrs FADDA_VPZ_D)>;
+
+// Floating point compare
+def : InstRW<[V3Write_2c_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]",
+ "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
+ "^FCM(LE|LT)_PPzZ0_[HSD]",
+ "^FCMUO_PPzZZ_[HSD]")>;
+
+// Floating point complex add
+def : InstRW<[V3Write_3c_1V], (instregex "^FCADD_ZPmZ_[HSD]")>;
+
+// Floating point complex multiply add
+def : InstRW<[V3Wr_ZFCMA, ReadDefault, V3Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
+def : InstRW<[V3Wr_ZFCMA, V3Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>;
+
+// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
+def : InstRW<[V3Write_4c_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
+ "^FCVTLT_ZPmZ_HtoS",
+ "^FCVTNT_ZPmZ_StoH")>;
+
+// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
+// or F64 to F16)
+def : InstRW<[V3Write_3c_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
+ "^FCVTLT_ZPmZ_StoD",
+ "^FCVTNT_ZPmZ_DtoS")>;
+
+// Floating point convert, round to odd
+def : InstRW<[V3Write_3c_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
+
+// Floating point base2 log, F16
+def : InstRW<[V3Write_6c_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
+
+// Floating point base2 log, F32
+def : InstRW<[V3Write_4c_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
+
+// Floating point base2 log, F64
+def : InstRW<[V3Write_3c_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
+
+// Floating point convert to integer, F16
+def : InstRW<[V3Write_6c_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
+
+// Floating point convert to integer, F32
+def : InstRW<[V3Write_4c_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
+
+// Floating point convert to integer, F64
+def : InstRW<[V3Write_3c_1V02],
+ (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
+
+// Floating point copy
+def : InstRW<[V3Write_2c_1V], (instregex "^FCPY_ZPmI_[HSD]",
+ "^FDUP_ZI_[HSD]")>;
+
+// Floating point divide, F16
+def : InstRW<[V3Write_13c_1V1_8rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
+
+// Floating point divide, F32
+def : InstRW<[V3Write_11c_1V1_4rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
+
+// Floating point divide, F64
+def : InstRW<[V3Write_14c_1V1_2rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
+
+// Floating point min/max pairwise
+def : InstRW<[V3Write_2c_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
+
+// Floating point min/max
+def : InstRW<[V3Write_2c_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
+ "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
+
+// Floating point multiply
+def : InstRW<[V3Write_3c_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
+ "^FMULX_ZPZZ_[HSD]",
+ "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
+ "^FMUL_ZPZ[IZ]_[HSD]")>;
+
+// Floating point multiply accumulate
+def : InstRW<[V3Wr_ZFMA, ReadDefault, V3Rd_ZFMA],
+ (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
+ "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
+def : InstRW<[V3Wr_ZFMA, V3Rd_ZFMA],
+ (instregex "^FML[AS]_ZZZI_[HSD]",
+ "^FN?ML[AS]_ZPZZZ_[HSD]")>;
+
+// Floating point multiply add/sub accumulate long
+def : InstRW<[V3Wr_ZFMAL, V3Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
+
+// Floating point reciprocal estimate, F16
+def : InstRW<[V3Write_6c_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
+
+// Floating point reciprocal estimate, F32
+def : InstRW<[V3Write_4c_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
+
+// Floating point reciprocal estimate, F64
+def : InstRW<[V3Write_3c_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
+
+// Floating point reciprocal step
+def : InstRW<[V3Write_4c_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
+
+// Floating point reduction, F16
+def : InstRW<[V3Write_8c_4V],
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>;
+
+// Floating point reduction, F32
+def : InstRW<[V3Write_6c_3V],
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>;
+
+// Floating point reduction, F64
+def : InstRW<[V3Write_4c_2V],
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>;
+
+// Floating point round to integral, F16
+def : InstRW<[V3Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
+
+// Floating point round to integral, F32
+def : InstRW<[V3Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
+
+// Floating point round to integral, F64
+def : InstRW<[V3Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
+
+// Floating point square root, F16
+def : InstRW<[V3Write_13c_1V1_8rc], (instregex "^FSQRT_ZPmZ_H")>;
+
+// Floating point square root, F32
+def : InstRW<[V3Write_11c_1V1_4rc], (instregex "^FSQRT_ZPmZ_S")>;
+
+// Floating point square root, F64
+def : InstRW<[V3Write_14c_1V1_2rc], (instregex "^FSQRT_ZPmZ_D")>;
+
+// Floating point trigonometric exponentiation
+def : InstRW<[V3Write_3c_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
+
+// Floating point trigonometric multiply add
+def : InstRW<[V3Write_4c_1V], (instregex "^FTMAD_ZZI_[HSD]")>;
+
+// Floating point trigonometric, miscellaneous
+def : InstRW<[V3Write_3c_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>;
+
+// §3.27 SVE BFloat16 (BF16) instructions
+// -----------------------------------------------------------------------------
+
+// Convert, F32 to BF16
+def : InstRW<[V3Write_4c_1V02], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
+
+// Dot product
+def : InstRW<[V3Wr_ZBFDOT, V3Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
+
+// Matrix multiply accumulate
+def : InstRW<[V3Wr_ZBFMMA, V3Rd_ZBFMMA], (instrs BFMMLA_ZZZ_HtoS)>;
+
+// Multiply accumulate long
+def : InstRW<[V3Wr_ZBFMAL, V3Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>;
+
+// §3.28 SVE Load instructions
+// -----------------------------------------------------------------------------
+
+// Load vector
+def : InstRW<[V3Write_6c_1L], (instrs LDR_ZXI)>;
+
+// Load predicate
+def : InstRW<[V3Write_6c_1L_1M], (instrs LDR_PXI)>;
+
+// Contiguous load, scalar + imm
+def : InstRW<[V3Write_6c_1L], (instregex "^LD1[BHWD]_IMM$",
+ "^LD1S?B_[HSD]_IMM$",
+ "^LD1S?H_[SD]_IMM$",
+ "^LD1S?W_D_IMM$" )>;
+// Contiguous load, scalar + scalar
+def : InstRW<[V3Write_6c_1L], (instregex "^LD1[BHWD]$",
+ "^LD1S?B_[HSD]$",
+ "^LD1S?H_[SD]$",
+ "^LD1S?W_D$" )>;
+
+// Contiguous load broadcast, scalar + imm
+def : InstRW<[V3Write_6c_1L], (instregex "^LD1R[BHWD]_IMM$",
+ "^LD1RS?B_[HSD]_IMM$",
+ "^LD1RS?H_[SD]_IMM$",
+ "^LD1RW_D_IMM$",
+ "^LD1RSW_IMM$",
+ "^LD1RQ_[BHWD]_IMM$")>;
+
+// Contiguous load broadcast, scalar + scalar
+def : InstRW<[V3Write_6c_1L], (instregex "^LD1RQ_[BHWD]$")>;
+
+// Non temporal load, scalar + imm
+// Non temporal load, scalar + scalar
+def : InstRW<[V3Write_6c_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>;
+
+// Non temporal gather load, vector + scalar 32-bit element size
+def : InstRW<[V3Write_9c_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S$",
+ "^LDNT1S[BH]_ZZR_S$")>;
+
+// Non temporal gather load, vector + scalar 64-bit element size
+def : InstRW<[V3Write_9c_2L_2V], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
+def : InstRW<[V3Write_9c_2L_2V], (instrs LDNT1D_ZZR_D)>;
+
+// Contiguous first faulting load, scalar + scalar
+def : InstRW<[V3Write_6c_1L_1I], (instregex "^LDFF1[BHWD]$",
+ "^LDFF1S?B_[HSD]$",
+ "^LDFF1S?H_[SD]$",
+ "^LDFF1S?W_D$")>;
+
+// Contiguous non faulting load, scalar + imm
+def : InstRW<[V3Write_6c_1L], (instregex "^LDNF1[BHWD]_IMM$",
+ "^LDNF1S?B_[HSD]_IMM$",
+ "^LDNF1S?H_[SD]_IMM$",
+ "^LDNF1S?W_D_IMM$")>;
+
+// Contiguous Load two structures to two vectors, scalar + imm
+def : InstRW<[V3Write_8c_2L_2V], (instregex "^LD2[BHWD]_IMM$")>;
+
+// Contiguous Load two structures to two vectors, scalar + scalar
+def : InstRW<[V3Write_9c_2L_2V_2I], (instregex "^LD2[BHWD]$")>;
+
+// Contiguous Load three structures to three vectors, scalar + imm
+def : InstRW<[V3Write_9c_3L_3V], (instregex "^LD3[BHWD]_IMM$")>;
+
+// Contiguous Load three structures to three vectors, scalar + scalar
+def : InstRW<[V3Write_10c_3V_3L_3I], (instregex "^LD3[BHWD]$")>;
+
+// Contiguous Load four structures to four vectors, scalar + imm
+def : InstRW<[V3Write_9c_4L_8V], (instregex "^LD4[BHWD]_IMM$")>;
+
+// Contiguous Load four structures to four vectors, scalar + scalar
+def : InstRW<[V3Write_10c_4L_8V_4I], (instregex "^LD4[BHWD]$")>;
+
+// Gather load, vector + imm, 32-bit element size
+def : InstRW<[V3Write_9c_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
+ "^GLD(FF)?1W_IMM$")>;
+
+// Gather load, vector + imm, 64-bit element size
+def : InstRW<[V3Write_9c_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
+ "^GLD(FF)?1D_IMM$")>;
+
+// Gather load, 32-bit scaled offset
+def : InstRW<[V3Write_10c_1L_8V],
+ (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED")>;
+
+// Gather load, 64-bit scaled offset
+// NOTE: These instructions are not specified in the SOG.
+def : InstRW<[V3Write_10c_1L_4V],
+ (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED$",
+ "^GLD(FF)?1D_([SU]XTW_)?SCALED$")>;
+
+// Gather load, 32-bit unpacked unscaled offset
+def : InstRW<[V3Write_9c_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
+ "^GLD(FF)?1W_[SU]XTW$")>;
+
+// Gather load, 64-bit unpacked unscaled offset
+// NOTE: These instructions are not specified in the SOG.
+def : InstRW<[V3Write_9c_1L_2V],
+ (instregex "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?$",
+ "^GLD(FF)?1D(_[SU]XTW)?$")>;
+
+// §3.29 SVE Store instructions
+// -----------------------------------------------------------------------------
+
+// Store from predicate reg
+def : InstRW<[V3Write_1c_1SA], (instrs STR_PXI)>;
+
+// Store from vector reg
+def : InstRW<[V3Write_2c_1SA_1V01], (instrs STR_ZXI)>;
+
+// Contiguous store, scalar + imm
+def : InstRW<[V3Write_2c_1SA_1V01], (instregex "^ST1[BHWD]_IMM$",
+ "^ST1B_[HSD]_IMM$",
+ "^ST1H_[SD]_IMM$",
+ "^ST1W_D_IMM$")>;
+
+// Contiguous store, scalar + scalar
+def : InstRW<[V3Write_2c_1SA_1I_1V01], (instregex "^ST1H(_[SD])?$")>;
+def : InstRW<[V3Write_2c_1SA_1V01], (instregex "^ST1[BWD]$",
+ "^ST1B_[HSD]$",
+ "^ST1W_D$")>;
+
+// Contiguous store two structures from two vectors, scalar + imm
+def : InstRW<[V3Write_4c_1SA_1V01], (instregex "^ST2[BHWD]_IMM$")>;
+
+// Contiguous store two structures from two vectors, scalar + scalar
+def : InstRW<[V3Write_4c_2SA_2I_2V01], (instrs ST2H)>;
+def : InstRW<[V3Write_4c_2SA_2V01], (instregex "^ST2[BWD]$")>;
+
+// Contiguous store three structures from three vectors, scalar + imm
+def : InstRW<[V3Write_7c_9SA_9V01], (instregex "^ST3[BHWD]_IMM$")>;
+
+// Contiguous store three structures from three vectors, scalar + scalar
+def : InstRW<[V3Write_7c_9SA_9I_9V01], (instregex "^ST3[BHWD]$")>;
+
+// Contiguous store four structures from four vectors, scalar + imm
+def : InstRW<[V3Write_11c_18SA_18V01], (instregex "^ST4[BHWD]_IMM$")>;
+
+// Contiguous store four structures from four vectors, scalar + scalar
+def : InstRW<[V3Write_11c_18SA_18I_18V01], (instregex "^ST4[BHWD]$")>;
+
+// Non temporal store, scalar + imm
+def : InstRW<[V3Write_2c_1SA_1V01], (instregex "^STNT1[BHWD]_ZRI$")>;
+
+// Non temporal store, scalar + scalar
+def : InstRW<[V3Write_2c_1SA_1I_1V01], (instrs STNT1H_ZRR)>;
+def : InstRW<[V3Write_2c_1SA_1V01], (instregex "^STNT1[BWD]_ZRR$")>;
+
+// Scatter non temporal store, vector + scalar 32-bit element size
+def : InstRW<[V3Write_4c_6SA_6V01], (instregex "^STNT1[BHW]_ZZR_S")>;
+
+// Scatter non temporal store, vector + scalar 64-bit element size
+def : InstRW<[V3Write_2c_3SA_3V01], (instregex "^STNT1[BHWD]_ZZR_D")>;
+
+// Scatter store vector + imm 32-bit element size
+def : InstRW<[V3Write_4c_6SA_6V01], (instregex "^SST1[BH]_S_IMM$",
+ "^SST1W_IMM$")>;
+
+// Scatter store vector + imm 64-bit element size
+def : InstRW<[V3Write_2c_3SA_3V01], (instregex "^SST1[BHW]_D_IMM$",
+ "^SST1D_IMM$")>;
+
+// Scatter store, 32-bit scaled offset
+def : InstRW<[V3Write_4c_6SA_6V01],
+ (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
+
+// Scatter store, 32-bit unpacked unscaled offset
+def : InstRW<[V3Write_2c_3SA_3V01], (instregex "^SST1[BHW]_D_[SU]XTW$",
+ "^SST1D_[SU]XTW$")>;
+
+// Scatter store, 32-bit unpacked scaled offset
+def : InstRW<[V3Write_2c_3SA_3V01], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
+ "^SST1D_[SU]XTW_SCALED$")>;
+
+// Scatter store, 32-bit unscaled offset
+def : InstRW<[V3Write_4c_6SA_6V01], (instregex "^SST1[BH]_S_[SU]XTW$",
+ "^SST1W_[SU]XTW$")>;
+
+// Scatter store, 64-bit scaled offset
+def : InstRW<[V3Write_2c_3SA_3V01], (instregex "^SST1[HW]_D_SCALED$",
+ "^SST1D_SCALED$")>;
+
+// Scatter store, 64-bit unscaled offset
+def : InstRW<[V3Write_2c_3SA_3V01], (instregex "^SST1[BHW]_D$",
+ "^SST1D$")>;
+
+// §3.30 SVE Miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// Read first fault register, unpredicated
+def : InstRW<[V3Write_2c_1M0], (instrs RDFFR_P)>;
+
+// Read first fault register, predicated
+def : InstRW<[V3Write_3or4c_1M0_1M], (instrs RDFFR_PPz)>;
+
+// Read first fault register and set flags
+def : InstRW<[V3Write_3or4c_1M0_1M], (instrs RDFFRS_PPz)>;
+
+// Set first fault register
+// Write to first fault register
+def : InstRW<[V3Write_2c_1M0], (instrs SETFFR, WRFFR)>;
+
+// Prefetch
+// NOTE: This is not specified in the SOG.
+def : InstRW<[V3Write_4c_1L], (instregex "^PRF[BHWD]")>;
+
+// §3.31 SVE Cryptographic instructions
+// -----------------------------------------------------------------------------
+
+// Crypto AES ops
+def : InstRW<[V3Write_2c_1V], (instregex "^AES[DE]_ZZZ_B$",
+ "^AESI?MC_ZZ_B$")>;
+
+// Crypto SHA3 ops
+def : InstRW<[V3Write_2c_1V], (instregex "^(BCAX|EOR3)_ZZZZ$",
+ "^RAX1_ZZZ_D$",
+ "^XAR_ZZZI_[BHSD]$")>;
+
+// Crypto SM4 ops
+def : InstRW<[V3Write_4c_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
+
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
new file mode 100644
index 0000000..0f1ec66
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
@@ -0,0 +1,2705 @@
+//=- AArch64SchedNeoverseV3AE.td - NeoverseV3AE Scheduling Defs --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the Arm Neoverse V3AE processors.
+// All information is taken from the V3AE Software Optimisation guide:
+//
+// https://developer.arm.com/documentation/109703/300/?lang=en
+//
+//===----------------------------------------------------------------------===//
+
+def NeoverseV3AEModel : SchedMachineModel {
+ let IssueWidth = 10; // Expect best value to be slightly higher than V2
+ let MicroOpBufferSize = 320; // Entries in micro-op re-order buffer. NOTE: Copied from Neoverse-V2
+ let LoadLatency = 4; // Optimistic load latency.
+ let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2.
+ let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
+ [HasSVE2p1, HasSVEB16B16,
+ HasCPA, HasCSSC]);
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Neoverse V3AE.
+// Instructions are first fetched and then decoded into internal macro-ops
+// (MOPs). From there, the MOPs proceed through register renaming and dispatch
+// stages. A MOP can be split into two micro-ops further down the pipeline
+// after the decode stage. Once dispatched, micro-ops wait for their operands
+// and issue out-of-order to one of nineteen issue pipelines. Each issue
+// pipeline can accept one micro-op per cycle.
+
+let SchedModel = NeoverseV3AEModel in {
+
+// Define the (19) issue ports.
+def V3AEUnitB : ProcResource<3>; // Branch 0/1/2
+def V3AEUnitS0 : ProcResource<1>; // Integer single-cycle 0
+def V3AEUnitS1 : ProcResource<1>; // Integer single-cycle 1
+def V3AEUnitS2 : ProcResource<1>; // Integer single-cycle 2
+def V3AEUnitS3 : ProcResource<1>; // Integer single-cycle 3
+def V3AEUnitS4 : ProcResource<1>; // Integer single-cycle 4
+def V3AEUnitS5 : ProcResource<1>; // Integer single-cycle 5
+def V3AEUnitM0 : ProcResource<1>; // Integer single/multicycle 0
+def V3AEUnitM1 : ProcResource<1>; // Integer single/multicycle 1
+def V3AEUnitV0 : ProcResource<1>; // FP/ASIMD 0
+def V3AEUnitV1 : ProcResource<1>; // FP/ASIMD 1
+def V3AEUnitLS0 : ProcResource<1>; // Load/Store 0
+def V3AEUnitL12 : ProcResource<2>; // Load 1/2
+def V3AEUnitST1 : ProcResource<1>; // Store 1
+def V3AEUnitD : ProcResource<2>; // Store data 0/1
+def V3AEUnitFlg : ProcResource<4>; // Flags
+
+def V3AEUnitS : ProcResGroup<[V3AEUnitS0, V3AEUnitS1, V3AEUnitS2, V3AEUnitS3, V3AEUnitS4, V3AEUnitS5]>; // Integer single-cycle 0/1/2/3/4/5
+def V3AEUnitI : ProcResGroup<[V3AEUnitS0, V3AEUnitS1, V3AEUnitS2, V3AEUnitS3, V3AEUnitS4, V3AEUnitS5, V3AEUnitM0, V3AEUnitM1]>; // Integer single-cycle 0/1/2/3/4/5 and single/multicycle 0/1
+def V3AEUnitM : ProcResGroup<[V3AEUnitM0, V3AEUnitM1]>; // Integer single/multicycle 0/1
+def V3AEUnitLSA : ProcResGroup<[V3AEUnitLS0, V3AEUnitL12, V3AEUnitST1]>; // Supergroup of L+SA
+def V3AEUnitL : ProcResGroup<[V3AEUnitLS0, V3AEUnitL12]>; // Load/Store 0 and Load 1/2
+def V3AEUnitSA : ProcResGroup<[V3AEUnitLS0, V3AEUnitST1]>; // Load/Store 0 and Store 1
+def V3AEUnitV : ProcResGroup<[V3AEUnitV0, V3AEUnitV1]>; // FP/ASIMD 0/1
+
+// Define commonly used read types.
+
+// No forwarding is provided for these types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+// NOTE: Copied from N2.
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Neoverse V3AE.
+
+//===----------------------------------------------------------------------===//
+
+// Define generic 0 micro-op types
+def V3AEWrite_0c : SchedWriteRes<[]> { let Latency = 0; }
+
+// Define generic 1 micro-op types
+
+def V3AEWrite_1c_1B : SchedWriteRes<[V3AEUnitB]> { let Latency = 1; }
+def V3AEWrite_1c_1F_1Flg : SchedWriteRes<[V3AEUnitI, V3AEUnitFlg]> { let Latency = 1; }
+def V3AEWrite_1c_1I : SchedWriteRes<[V3AEUnitI]> { let Latency = 1; }
+def V3AEWrite_1c_1M : SchedWriteRes<[V3AEUnitM]> { let Latency = 1; }
+def V3AEWrite_1c_1SA : SchedWriteRes<[V3AEUnitSA]> { let Latency = 1; }
+def V3AEWrite_2c_1M : SchedWriteRes<[V3AEUnitM]> { let Latency = 2; }
+def V3AEWrite_2c_1M_1Flg : SchedWriteRes<[V3AEUnitM, V3AEUnitFlg]> { let Latency = 2; }
+def V3AEWrite_3c_1M : SchedWriteRes<[V3AEUnitM]> { let Latency = 3; }
+def V3AEWrite_2c_1M0 : SchedWriteRes<[V3AEUnitM0]> { let Latency = 2; }
+def V3AEWrite_3c_1M0 : SchedWriteRes<[V3AEUnitM0]> { let Latency = 3; }
+def V3AEWrite_4c_1M0 : SchedWriteRes<[V3AEUnitM0]> { let Latency = 4; }
+def V3AEWrite_12c_1M0 : SchedWriteRes<[V3AEUnitM0]> { let Latency = 12;
+ let ReleaseAtCycles = [12]; }
+def V3AEWrite_20c_1M0 : SchedWriteRes<[V3AEUnitM0]> { let Latency = 20;
+ let ReleaseAtCycles = [20]; }
+def V3AEWrite_4c_1L : SchedWriteRes<[V3AEUnitL]> { let Latency = 4; }
+def V3AEWrite_6c_1L : SchedWriteRes<[V3AEUnitL]> { let Latency = 6; }
+def V3AEWrite_2c_1V : SchedWriteRes<[V3AEUnitV]> { let Latency = 2; }
+def V3AEWrite_2c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 2; }
+def V3AEWrite_3c_1V : SchedWriteRes<[V3AEUnitV]> { let Latency = 3; }
+def V3AEWrite_4c_1V : SchedWriteRes<[V3AEUnitV]> { let Latency = 4; }
+def V3AEWrite_5c_1V : SchedWriteRes<[V3AEUnitV]> { let Latency = 5; }
+def V3AEWrite_6c_1V : SchedWriteRes<[V3AEUnitV]> { let Latency = 6; }
+def V3AEWrite_12c_1V : SchedWriteRes<[V3AEUnitV]> { let Latency = 12; }
+def V3AEWrite_3c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 3; }
+def V3AEWrite_4c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 4; }
+def V3AEWrite_9c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 9; }
+def V3AEWrite_10c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 10; }
+def V3AEWrite_8c_1V1 : SchedWriteRes<[V3AEUnitV1]> { let Latency = 8; }
+def V3AEWrite_12c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 12;
+ let ReleaseAtCycles = [11]; }
+def V3AEWrite_13c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 13; }
+def V3AEWrite_15c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 15; }
+def V3AEWrite_13c_1V1 : SchedWriteRes<[V3AEUnitV1]> { let Latency = 13;
+ let ReleaseAtCycles = [8]; }
+def V3AEWrite_16c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 16; }
+def V3AEWrite_20c_1V0 : SchedWriteRes<[V3AEUnitV0]> { let Latency = 20;
+ let ReleaseAtCycles = [20]; }
+def V3AEWrite_2c_1V1 : SchedWriteRes<[V3AEUnitV1]> { let Latency = 2; }
+def V3AEWrite_3c_1V1 : SchedWriteRes<[V3AEUnitV1]> { let Latency = 3; }
+def V3AEWrite_4c_1V1 : SchedWriteRes<[V3AEUnitV1]> { let Latency = 4; }
+def V3AEWrite_6c_1V1 : SchedWriteRes<[V3AEUnitV1]> { let Latency = 6; }
+def V3AEWrite_10c_1V1 : SchedWriteRes<[V3AEUnitV1]> { let Latency = 10; }
+def V3AEWrite_6c_1SA : SchedWriteRes<[V3AEUnitSA]> { let Latency = 6; }
+
+//===----------------------------------------------------------------------===//
+// Define generic 2 micro-op types
+
+def V3AEWrite_1c_1B_1S : SchedWriteRes<[V3AEUnitB, V3AEUnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_6c_1M0_1B : SchedWriteRes<[V3AEUnitM0, V3AEUnitB]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_9c_1M0_1L : SchedWriteRes<[V3AEUnitM0, V3AEUnitL]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_3c_1I_1M : SchedWriteRes<[V3AEUnitI, V3AEUnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_1c_2M : SchedWriteRes<[V3AEUnitM, V3AEUnitM]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_3c_2M : SchedWriteRes<[V3AEUnitM, V3AEUnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_4c_2M : SchedWriteRes<[V3AEUnitM, V3AEUnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_5c_1L_1I : SchedWriteRes<[V3AEUnitL, V3AEUnitI]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_6c_1I_1L : SchedWriteRes<[V3AEUnitI, V3AEUnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_7c_1I_1L : SchedWriteRes<[V3AEUnitI, V3AEUnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_1c_1SA_1D : SchedWriteRes<[V3AEUnitSA, V3AEUnitD]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_5c_1M0_1V : SchedWriteRes<[V3AEUnitM0, V3AEUnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_2c_1SA_1V : SchedWriteRes<[V3AEUnitSA, V3AEUnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_2c_2V : SchedWriteRes<[V3AEUnitV, V3AEUnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_5c_1V1_1V : SchedWriteRes<[V3AEUnitV1, V3AEUnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_4c_2V0 : SchedWriteRes<[V3AEUnitV0, V3AEUnitV0]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_4c_2V : SchedWriteRes<[V3AEUnitV, V3AEUnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_6c_2V : SchedWriteRes<[V3AEUnitV, V3AEUnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_6c_2L : SchedWriteRes<[V3AEUnitL, V3AEUnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_8c_1L_1V : SchedWriteRes<[V3AEUnitL, V3AEUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_4c_1SA_1V : SchedWriteRes<[V3AEUnitSA, V3AEUnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_3c_1M0_1M : SchedWriteRes<[V3AEUnitM0, V3AEUnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_4c_1M0_1M : SchedWriteRes<[V3AEUnitM0, V3AEUnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_1c_1M0_1M : SchedWriteRes<[V3AEUnitM0, V3AEUnitM]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_2c_1M0_1M : SchedWriteRes<[V3AEUnitM0, V3AEUnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_6c_2V1 : SchedWriteRes<[V3AEUnitV1, V3AEUnitV1]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_5c_2V0 : SchedWriteRes<[V3AEUnitV0, V3AEUnitV0]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_5c_1V1_1M0 : SchedWriteRes<[V3AEUnitV1, V3AEUnitM0]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_6c_1V1_1M0 : SchedWriteRes<[V3AEUnitV1, V3AEUnitM0]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_7c_1M0_1V0 : SchedWriteRes<[V3AEUnitM0, V3AEUnitV0]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_2c_1V0_1M : SchedWriteRes<[V3AEUnitV0, V3AEUnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_3c_1V0_1M : SchedWriteRes<[V3AEUnitV0, V3AEUnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_6c_1V_1V1 : SchedWriteRes<[V3AEUnitV, V3AEUnitV1]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_6c_1L_1M : SchedWriteRes<[V3AEUnitL, V3AEUnitM]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_6c_1L_1I : SchedWriteRes<[V3AEUnitL, V3AEUnitI]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V3AEWrite_8c_1M0_1V : SchedWriteRes<[V3AEUnitM0, V3AEUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 3 micro-op types
+
+def V3AEWrite_1c_1SA_1D_1I : SchedWriteRes<[V3AEUnitSA, V3AEUnitD, V3AEUnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 3;
+}
+
+def V3AEWrite_2c_1SA_1V_1I : SchedWriteRes<[V3AEUnitSA, V3AEUnitV, V3AEUnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V3AEWrite_2c_1SA_2V : SchedWriteRes<[V3AEUnitSA, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V3AEWrite_4c_1SA_2V : SchedWriteRes<[V3AEUnitSA, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def V3AEWrite_9c_1L_2V : SchedWriteRes<[V3AEUnitL, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+
+def V3AEWrite_4c_3V : SchedWriteRes<[V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def V3AEWrite_7c_1M_1M0_1V : SchedWriteRes<[V3AEUnitM, V3AEUnitM0, V3AEUnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+def V3AEWrite_2c_1SA_1I_1V : SchedWriteRes<[V3AEUnitSA, V3AEUnitI, V3AEUnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V3AEWrite_6c_3L : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def V3AEWrite_6c_3V : SchedWriteRes<[V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def V3AEWrite_8c_1L_2V : SchedWriteRes<[V3AEUnitL, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 4 micro-op types
+
+def V3AEWrite_2c_1SA_2V_1I : SchedWriteRes<[V3AEUnitSA, V3AEUnitV, V3AEUnitV,
+ V3AEUnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_5c_1I_3L : SchedWriteRes<[V3AEUnitI, V3AEUnitL, V3AEUnitL, V3AEUnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_6c_4V0 : SchedWriteRes<[V3AEUnitV0, V3AEUnitV0, V3AEUnitV0, V3AEUnitV0]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_8c_4V : SchedWriteRes<[V3AEUnitV, V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_6c_2V_2V1 : SchedWriteRes<[V3AEUnitV, V3AEUnitV, V3AEUnitV1,
+ V3AEUnitV1]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_6c_4V : SchedWriteRes<[V3AEUnitV, V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_8c_2L_2V : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_9c_2L_2V : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_2c_2SA_2V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_4c_2SA_2V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_8c_2M0_2V0 : SchedWriteRes<[V3AEUnitM0, V3AEUnitM0, V3AEUnitV0,
+ V3AEUnitV0]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_8c_2V_2V1 : SchedWriteRes<[V3AEUnitV, V3AEUnitV, V3AEUnitV1,
+ V3AEUnitV1]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_4c_2M0_2M : SchedWriteRes<[V3AEUnitM0, V3AEUnitM0, V3AEUnitM,
+ V3AEUnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_5c_2M0_2M : SchedWriteRes<[V3AEUnitM0, V3AEUnitM0, V3AEUnitM,
+ V3AEUnitM]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_6c_2I_2L : SchedWriteRes<[V3AEUnitI, V3AEUnitI, V3AEUnitL, V3AEUnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_7c_4L : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitL, V3AEUnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+
+def V3AEWrite_6c_1SA_3V : SchedWriteRes<[V3AEUnitSA, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 5 micro-op types
+
+def V3AEWrite_2c_1SA_2V_2I : SchedWriteRes<[V3AEUnitSA, V3AEUnitV, V3AEUnitV,
+ V3AEUnitI, V3AEUnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 5;
+}
+
+def V3AEWrite_8c_2L_3V : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+
+def V3AEWrite_9c_1L_4V : SchedWriteRes<[V3AEUnitL, V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+
+def V3AEWrite_10c_1L_4V : SchedWriteRes<[V3AEUnitL, V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 5;
+}
+
+def V3AEWrite_6c_5V : SchedWriteRes<[V3AEUnitV, V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 5;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 6 micro-op types
+
+def V3AEWrite_8c_3L_3V : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitL,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+}
+
+def V3AEWrite_9c_3L_3V : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitL,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def V3AEWrite_9c_2L_4V : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def V3AEWrite_9c_2L_2V_2I : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitV,
+ V3AEUnitV, V3AEUnitI, V3AEUnitI]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def V3AEWrite_9c_2V_4V1 : SchedWriteRes<[V3AEUnitV, V3AEUnitV, V3AEUnitV1,
+ V3AEUnitV1, V3AEUnitV1, V3AEUnitV1]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def V3AEWrite_2c_3SA_3V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 6;
+}
+
+def V3AEWrite_4c_2SA_4V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+def V3AEWrite_5c_2SA_4V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 6;
+}
+
+def V3AEWrite_4c_2SA_2I_2V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitI,
+ V3AEUnitI, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 7 micro-op types
+
+def V3AEWrite_8c_3L_4V : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitL,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 7;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 8 micro-op types
+
+def V3AEWrite_2c_4SA_4V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 8;
+}
+
+def V3AEWrite_4c_4SA_4V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 8;
+}
+
+def V3AEWrite_6c_2SA_6V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 8;
+}
+
+def V3AEWrite_8c_4L_4V : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitL, V3AEUnitL,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 9 micro-op types
+
+def V3AEWrite_6c_3SA_6V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 9;
+}
+
+def V3AEWrite_10c_1L_8V : SchedWriteRes<[V3AEUnitL, V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 9;
+}
+
+def V3AEWrite_10c_3V_3L_3I : SchedWriteRes<[V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitL, V3AEUnitL, V3AEUnitL,
+ V3AEUnitI, V3AEUnitI, V3AEUnitI]> {
+ let Latency = 10;
+ let NumMicroOps = 9;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 10 micro-op types
+
+def V3AEWrite_9c_6L_4V : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitL, V3AEUnitL,
+ V3AEUnitL, V3AEUnitL, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 10;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 12 micro-op types
+
+def V3AEWrite_5c_4SA_8V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 12;
+}
+
+def V3AEWrite_9c_4L_8V : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitL,
+ V3AEUnitL, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 12;
+}
+
+def V3AEWrite_10c_4L_8V : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitL,
+ V3AEUnitL, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 12;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 16 micro-op types
+
+def V3AEWrite_7c_4SA_12V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 16;
+}
+
+def V3AEWrite_10c_4L_8V_4I : SchedWriteRes<[V3AEUnitL, V3AEUnitL, V3AEUnitL,
+ V3AEUnitL, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitI, V3AEUnitI, V3AEUnitI,
+ V3AEUnitI]> {
+ let Latency = 10;
+ let NumMicroOps = 16;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 18 micro-op types
+
+def V3AEWrite_7c_9SA_9V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 18;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 27 micro-op types
+
+def V3AEWrite_7c_9SA_9I_9V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitI, V3AEUnitI, V3AEUnitI,
+ V3AEUnitI, V3AEUnitI, V3AEUnitI,
+ V3AEUnitI, V3AEUnitI, V3AEUnitI,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 27;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 36 micro-op types
+
+def V3AEWrite_11c_18SA_18V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 36;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 54 micro-op types
+
+def V3AEWrite_11c_18SA_18I_18V : SchedWriteRes<[V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitSA, V3AEUnitSA,
+ V3AEUnitI, V3AEUnitI, V3AEUnitI,
+ V3AEUnitI, V3AEUnitI, V3AEUnitI,
+ V3AEUnitI, V3AEUnitI, V3AEUnitI,
+ V3AEUnitI, V3AEUnitI, V3AEUnitI,
+ V3AEUnitI, V3AEUnitI, V3AEUnitI,
+ V3AEUnitI, V3AEUnitI, V3AEUnitI,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV, V3AEUnitV,
+ V3AEUnitV, V3AEUnitV,
+ V3AEUnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 54;
+}
+
+//===----------------------------------------------------------------------===//
+// Define predicate-controlled types
+
+def V3AEWrite_ArithI : SchedWriteVariant<[
+ SchedVar<IsCheapLSL, [V3AEWrite_1c_1I]>,
+ SchedVar<NoSchedPred, [V3AEWrite_2c_1M]>]>;
+
+def V3AEWrite_ArithF : SchedWriteVariant<[
+ SchedVar<IsCheapLSL, [V3AEWrite_1c_1F_1Flg]>,
+ SchedVar<NoSchedPred, [V3AEWrite_2c_1M_1Flg]>]>;
+
+def V3AEWrite_Logical : SchedWriteVariant<[
+ SchedVar<NeoverseNoLSL, [V3AEWrite_1c_1F_1Flg]>,
+ SchedVar<NoSchedPred, [V3AEWrite_2c_1M_1Flg]>]>;
+
+def V3AEWrite_Extr : SchedWriteVariant<[
+ SchedVar<IsRORImmIdiomPred, [V3AEWrite_1c_1I]>,
+ SchedVar<NoSchedPred, [V3AEWrite_3c_1I_1M]>]>;
+
+def V3AEWrite_LdrHQ : SchedWriteVariant<[
+ SchedVar<NeoverseHQForm, [V3AEWrite_7c_1I_1L]>,
+ SchedVar<NoSchedPred, [V3AEWrite_6c_1L]>]>;
+
+def V3AEWrite_StrHQ : SchedWriteVariant<[
+ SchedVar<NeoverseHQForm, [V3AEWrite_2c_1SA_1V_1I]>,
+ SchedVar<NoSchedPred, [V3AEWrite_2c_1SA_1V]>]>;
+
+def V3AEWrite_0or1c_1I : SchedWriteVariant<[
+ SchedVar<NeoverseZeroMove, [V3AEWrite_0c]>,
+ SchedVar<NoSchedPred, [V3AEWrite_1c_1I]>]>;
+
+def V3AEWrite_0or2c_1V : SchedWriteVariant<[
+ SchedVar<NeoverseZeroMove, [V3AEWrite_0c]>,
+ SchedVar<NoSchedPred, [V3AEWrite_2c_1V]>]>;
+
+def V3AEWrite_0or3c_1M0 : SchedWriteVariant<[
+ SchedVar<NeoverseZeroMove, [V3AEWrite_0c]>,
+ SchedVar<NoSchedPred, [V3AEWrite_3c_1M0]>]>;
+
+def V3AEWrite_2or3c_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V3AEWrite_3c_1M]>,
+ SchedVar<NoSchedPred, [V3AEWrite_2c_1M]>]>;
+
+def V3AEWrite_1or2c_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V3AEWrite_2c_1M]>,
+ SchedVar<NoSchedPred, [V3AEWrite_1c_1M]>]>;
+
+def V3AEWrite_3or4c_1M0_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V3AEWrite_4c_1M0_1M]>,
+ SchedVar<NoSchedPred, [V3AEWrite_3c_1M0_1M]>]>;
+
+def V3AEWrite_2or3c_1V0 : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V3AEWrite_3c_1V0]>,
+ SchedVar<NoSchedPred, [V3AEWrite_2c_1V0]>]>;
+
+def V3AEWrite_2or3c_1V0_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V3AEWrite_3c_1V0_1M]>,
+ SchedVar<NoSchedPred, [V3AEWrite_2c_1V0_1M]>]>;
+
+def V3AEWrite_IncDec : SchedWriteVariant<[
+ SchedVar<NeoverseCheapIncDec, [V3AEWrite_1c_1I]>,
+ SchedVar<NoSchedPred, [V3AEWrite_2c_1M]>]>;
+
+//===----------------------------------------------------------------------===//
+// Define forwarded types
+
+// NOTE: SOG, p. 16, n. 2: Accumulator forwarding is not supported for
+// consumers of 64 bit multiply high operations?
+def V3AEWr_IM : SchedWriteRes<[V3AEUnitM]> { let Latency = 2; }
+
+def V3AEWr_FMA : SchedWriteRes<[V3AEUnitV]> { let Latency = 4; }
+def V3AERd_FMA : SchedReadAdvance<2, [WriteFMul, V3AEWr_FMA]>;
+
+def V3AEWr_VA : SchedWriteRes<[V3AEUnitV]> { let Latency = 4; }
+def V3AERd_VA : SchedReadAdvance<3, [V3AEWr_VA]>;
+
+def V3AEWr_VDOT : SchedWriteRes<[V3AEUnitV]> { let Latency = 3; }
+def V3AERd_VDOT : SchedReadAdvance<2, [V3AEWr_VDOT]>;
+
+def V3AEWr_VMMA : SchedWriteRes<[V3AEUnitV]> { let Latency = 3; }
+def V3AERd_VMMA : SchedReadAdvance<2, [V3AEWr_VMMA]>;
+
+def V3AEWr_VMA : SchedWriteRes<[V3AEUnitV0]> { let Latency = 4; }
+def V3AERd_VMA : SchedReadAdvance<3, [V3AEWr_VMA]>;
+
+def V3AEWr_VMAH : SchedWriteRes<[V3AEUnitV0, V3AEUnitV0]> { let Latency = 4; }
+def V3AERd_VMAH : SchedReadAdvance<2, [V3AEWr_VMAH]>;
+
+def V3AEWr_VMAL : SchedWriteRes<[V3AEUnitV0]> { let Latency = 4; }
+def V3AERd_VMAL : SchedReadAdvance<3, [V3AEWr_VMAL]>;
+
+def V3AEWr_VPA : SchedWriteRes<[V3AEUnitV]> { let Latency = 4; }
+def V3AERd_VPA : SchedReadAdvance<3, [V3AEWr_VPA]>;
+
+def V3AEWr_VSA : SchedWriteRes<[V3AEUnitV]> { let Latency = 4; }
+def V3AERd_VSA : SchedReadAdvance<3, [V3AEWr_VSA]>;
+
+def V3AEWr_VFCMA : SchedWriteRes<[V3AEUnitV]> { let Latency = 4; }
+def V3AERd_VFCMA : SchedReadAdvance<2, [V3AEWr_VFCMA]>;
+
+def V3AEWr_VFM : SchedWriteRes<[V3AEUnitV]> { let Latency = 3; }
+def V3AEWr_VFMA : SchedWriteRes<[V3AEUnitV]> { let Latency = 4; }
+def V3AERd_VFMA : SchedReadAdvance<2, [V3AEWr_VFM, V3AEWr_VFMA]>;
+
+def V3AEWr_VFMAL : SchedWriteRes<[V3AEUnitV]> { let Latency = 4; }
+def V3AERd_VFMAL : SchedReadAdvance<2, [V3AEWr_VFMAL]>;
+
+def V3AEWr_VBFDOT : SchedWriteRes<[V3AEUnitV]> { let Latency = 5; }
+def V3AERd_VBFDOT : SchedReadAdvance<2, [V3AEWr_VBFDOT]>;
+def V3AEWr_VBFMMA : SchedWriteRes<[V3AEUnitV]> { let Latency = 6; }
+def V3AERd_VBFMMA : SchedReadAdvance<2, [V3AEWr_VBFMMA]>;
+def V3AEWr_VBFMAL : SchedWriteRes<[V3AEUnitV]> { let Latency = 5; }
+def V3AERd_VBFMAL : SchedReadAdvance<3, [V3AEWr_VBFMAL]>;
+
+def V3AEWr_CRC : SchedWriteRes<[V3AEUnitM0]> { let Latency = 2; }
+def V3AERd_CRC : SchedReadAdvance<1, [V3AEWr_CRC]>;
+
+def V3AEWr_ZA : SchedWriteRes<[V3AEUnitV]> { let Latency = 4; }
+def V3AERd_ZA : SchedReadAdvance<3, [V3AEWr_ZA]>;
+def V3AEWr_ZPA : SchedWriteRes<[V3AEUnitV]> { let Latency = 4; }
+def V3AERd_ZPA : SchedReadAdvance<3, [V3AEWr_ZPA]>;
+def V3AEWr_ZSA : SchedWriteRes<[V3AEUnitV1]> { let Latency = 4; }
+def V3AERd_ZSA : SchedReadAdvance<3, [V3AEWr_ZSA]>;
+
+def V3AEWr_ZDOTB : SchedWriteRes<[V3AEUnitV]> { let Latency = 3; }
+def V3AERd_ZDOTB : SchedReadAdvance<2, [V3AEWr_ZDOTB]>;
+def V3AEWr_ZDOTH : SchedWriteRes<[V3AEUnitV0]> { let Latency = 3; }
+def V3AERd_ZDOTH : SchedReadAdvance<2, [V3AEWr_ZDOTH]>;
+
+// NOTE: SOG p. 43: Complex multiply-add B, H, S element size: How to reduce
+// throughput to 1 in case of forwarding?
+def V3AEWr_ZCMABHS : SchedWriteRes<[V3AEUnitV0]> { let Latency = 4; }
+def V3AERd_ZCMABHS : SchedReadAdvance<3, [V3AEWr_ZCMABHS]>;
+def V3AEWr_ZCMAD : SchedWriteRes<[V3AEUnitV0, V3AEUnitV0]> { let Latency = 5; }
+def V3AERd_ZCMAD : SchedReadAdvance<2, [V3AEWr_ZCMAD]>;
+
+def V3AEWr_ZMMA : SchedWriteRes<[V3AEUnitV]> { let Latency = 3; }
+def V3AERd_ZMMA : SchedReadAdvance<2, [V3AEWr_ZMMA]>;
+
+def V3AEWr_ZMABHS : SchedWriteRes<[V3AEUnitV0]> { let Latency = 4; }
+def V3AERd_ZMABHS : SchedReadAdvance<3, [V3AEWr_ZMABHS]>;
+def V3AEWr_ZMAD : SchedWriteRes<[V3AEUnitV0, V3AEUnitV0]> { let Latency = 5; }
+def V3AERd_ZMAD : SchedReadAdvance<2, [V3AEWr_ZMAD]>;
+
+def V3AEWr_ZMAL : SchedWriteRes<[V3AEUnitV0]> { let Latency = 4; }
+def V3AERd_ZMAL : SchedReadAdvance<3, [V3AEWr_ZMAL]>;
+
+def V3AEWr_ZMASQL : SchedWriteRes<[V3AEUnitV0]> { let Latency = 4; }
+def V3AEWr_ZMASQBHS : SchedWriteRes<[V3AEUnitV0]> { let Latency = 4; }
+def V3AEWr_ZMASQD : SchedWriteRes<[V3AEUnitV0, V3AEUnitV0]> { let Latency = 5; }
+def V3AERd_ZMASQ : SchedReadAdvance<2, [V3AEWr_ZMASQL, V3AEWr_ZMASQBHS,
+ V3AEWr_ZMASQD]>;
+
+def V3AEWr_ZFCMA : SchedWriteRes<[V3AEUnitV]> { let Latency = 5; }
+def V3AERd_ZFCMA : SchedReadAdvance<3, [V3AEWr_ZFCMA]>;
+
+def V3AEWr_ZFMA : SchedWriteRes<[V3AEUnitV]> { let Latency = 4; }
+def V3AERd_ZFMA : SchedReadAdvance<2, [V3AEWr_ZFMA]>;
+
+def V3AEWr_ZFMAL : SchedWriteRes<[V3AEUnitV]> { let Latency = 4; }
+def V3AERd_ZFMAL : SchedReadAdvance<2, [V3AEWr_ZFMAL]>;
+
+def V3AEWr_ZBFDOT : SchedWriteRes<[V3AEUnitV]> { let Latency = 5; }
+def V3AERd_ZBFDOT : SchedReadAdvance<2, [V3AEWr_ZBFDOT]>;
+def V3AEWr_ZBFMMA : SchedWriteRes<[V3AEUnitV]> { let Latency = 6; }
+def V3AERd_ZBFMMA : SchedReadAdvance<2, [V3AEWr_ZBFMMA]>;
+def V3AEWr_ZBFMAL : SchedWriteRes<[V3AEUnitV]> { let Latency = 5; }
+def V3AERd_ZBFMAL : SchedReadAdvance<3, [V3AEWr_ZBFMAL]>;
+
+//===----------------------------------------------------------------------===//
+// Define types with long resource cycles (rc)
+
+def V3AEWrite_6c_1V1_5rc : SchedWriteRes<[V3AEUnitV1]> { let Latency = 6; let ReleaseAtCycles = [ 5]; }
+def V3AEWrite_9c_1V1_2rc : SchedWriteRes<[V3AEUnitV1]> { let Latency = 9; let ReleaseAtCycles = [ 2]; }
+def V3AEWrite_9c_1V1_4rc : SchedWriteRes<[V3AEUnitV1]> { let Latency = 9; let ReleaseAtCycles = [ 4]; }
+def V3AEWrite_10c_1V1_9rc : SchedWriteRes<[V3AEUnitV1]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
+def V3AEWrite_11c_1V1_4rc : SchedWriteRes<[V3AEUnitV1]> { let Latency = 11; let ReleaseAtCycles = [ 4]; }
+def V3AEWrite_13c_1V1_8rc : SchedWriteRes<[V3AEUnitV1]> { let Latency = 13; let ReleaseAtCycles = [8]; }
+def V3AEWrite_14c_1V1_2rc : SchedWriteRes<[V3AEUnitV1]> { let Latency = 14; let ReleaseAtCycles = [2]; }
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// §3.3 Branch instructions
+// -----------------------------------------------------------------------------
+
+// Branch, immed
+// Compare and branch
+def : SchedAlias<WriteBr, V3AEWrite_1c_1B>;
+
+// Branch, register
+def : SchedAlias<WriteBrReg, V3AEWrite_1c_1B>;
+
+// Branch and link, immed
+// Branch and link, register
+def : InstRW<[V3AEWrite_1c_1B_1S], (instrs BL, BLR)>;
+
+// §3.4 Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+
+// ALU, basic
+def : SchedAlias<WriteI, V3AEWrite_1c_1I>;
+
+// ALU, basic, flagset
+def : InstRW<[V3AEWrite_1c_1F_1Flg],
+ (instregex "^(ADD|SUB)S[WX]r[ir]$",
+ "^(ADC|SBC)S[WX]r$",
+ "^ANDS[WX]ri$",
+ "^(AND|BIC)S[WX]rr$")>;
+def : InstRW<[V3AEWrite_0or1c_1I], (instregex "^MOVZ[WX]i$")>;
+
+// ALU, extend and shift
+def : SchedAlias<WriteIEReg, V3AEWrite_2c_1M>;
+
+// Arithmetic, LSL shift, shift <= 4
+// Arithmetic, flagset, LSL shift, shift <= 4
+// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
+def : SchedAlias<WriteISReg, V3AEWrite_ArithI>;
+def : InstRW<[V3AEWrite_ArithF],
+ (instregex "^(ADD|SUB)S[WX]rs$")>;
+
+// Arithmetic, immediate to logical address tag
+def : InstRW<[V3AEWrite_2c_1M], (instrs ADDG, SUBG)>;
+
+// Conditional compare
+def : InstRW<[V3AEWrite_1c_1F_1Flg], (instregex "^CCM[NP][WX][ir]")>;
+
+// Convert floating-point condition flags
+// Flag manipulation instructions
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+// Insert Random Tags
+def : InstRW<[V3AEWrite_2c_1M], (instrs IRG, IRGstack)>;
+
+// Insert Tag Mask
+// Subtract Pointer
+def : InstRW<[V3AEWrite_1c_1I], (instrs GMI, SUBP)>;
+
+// Subtract Pointer, flagset
+def : InstRW<[V3AEWrite_1c_1F_1Flg], (instrs SUBPS)>;
+
+// Logical, shift, no flagset
+def : InstRW<[V3AEWrite_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
+def : InstRW<[V3AEWrite_0or1c_1I], (instregex "^ORR[WX]rs$")>;
+
+// Logical, shift, flagset
+def : InstRW<[V3AEWrite_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
+
+// Move and shift instructions
+// -----------------------------------------------------------------------------
+
+def : SchedAlias<WriteImm, V3AEWrite_1c_1I>;
+
+// §3.5 Divide and multiply instructions
+// -----------------------------------------------------------------------------
+
+// SDIV, UDIV
+def : SchedAlias<WriteID32, V3AEWrite_12c_1M0>;
+def : SchedAlias<WriteID64, V3AEWrite_20c_1M0>;
+
+def : SchedAlias<WriteIM32, V3AEWrite_2c_1M>;
+def : SchedAlias<WriteIM64, V3AEWrite_2c_1M>;
+
+// Multiply
+// Multiply accumulate, W-form
+// Multiply accumulate, X-form
+def : InstRW<[V3AEWr_IM], (instregex "^M(ADD|SUB)[WX]rrr$")>;
+
+// Multiply accumulate long
+// Multiply long
+def : InstRW<[V3AEWr_IM], (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
+
+// Multiply high
+def : InstRW<[V3AEWrite_3c_1M], (instrs SMULHrr, UMULHrr)>;
+
+// §3.6 Pointer Authentication Instructions (v8.3 PAC)
+// -----------------------------------------------------------------------------
+
+// Authenticate data address
+// Authenticate instruction address
+// Compute pointer authentication code for data address
+// Compute pointer authentication code, using generic key
+// Compute pointer authentication code for instruction address
+def : InstRW<[V3AEWrite_4c_1M0], (instregex "^AUT", "^PAC")>;
+
+// Branch and link, register, with pointer authentication
+// Branch, register, with pointer authentication
+// Branch, return, with pointer authentication
+def : InstRW<[V3AEWrite_6c_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
+ BRAAZ, BRAB, BRABZ, RETAA, RETAB,
+ ERETAA, ERETAB)>;
+
+
+// Load register, with pointer authentication
+def : InstRW<[V3AEWrite_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
+
+// Strip pointer authentication code
+def : InstRW<[V3AEWrite_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>;
+
+// §3.7 Miscellaneous data-processing instructions
+// -----------------------------------------------------------------------------
+
+// Address generation
+def : InstRW<[V3AEWrite_1c_1I], (instrs ADR, ADRP)>;
+
+// Bitfield extract, one reg
+// Bitfield extract, two regs
+def : SchedAlias<WriteExtr, V3AEWrite_Extr>;
+def : InstRW<[V3AEWrite_Extr], (instrs EXTRWrri, EXTRXrri)>;
+
+// Bitfield move, basic
+def : SchedAlias<WriteIS, V3AEWrite_1c_1I>;
+
+// Bitfield move, insert
+def : InstRW<[V3AEWrite_2c_1M], (instregex "^BFM[WX]ri$")>;
+
+// §3.8 Load instructions
+// -----------------------------------------------------------------------------
+
+// NOTE: SOG p. 19: Throughput of LDN?P X-form should be 2, but reported as 3.
+
+def : SchedAlias<WriteLD, V3AEWrite_4c_1L>;
+def : SchedAlias<WriteLDIdx, V3AEWrite_4c_1L>;
+
+// Load register, literal
+def : InstRW<[V3AEWrite_5c_1L_1I], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>;
+
+// Load pair, signed immed offset, signed words
+def : InstRW<[V3AEWrite_5c_1I_3L, WriteLDHi], (instrs LDPSWi)>;
+
+// Load pair, immed post-index or immed pre-index, signed words
+def : InstRW<[WriteAdr, V3AEWrite_5c_1I_3L, WriteLDHi],
+ (instregex "^LDPSW(post|pre)$")>;
+
+// §3.9 Store instructions
+// -----------------------------------------------------------------------------
+
+// NOTE: SOG, p. 20: Unsure if STRH uses pipeline I.
+
+def : SchedAlias<WriteST, V3AEWrite_1c_1SA_1D>;
+def : SchedAlias<WriteSTIdx, V3AEWrite_1c_1SA_1D>;
+def : SchedAlias<WriteSTP, V3AEWrite_1c_1SA_1D>;
+def : SchedAlias<WriteAdr, V3AEWrite_1c_1I>;
+
+// §3.10 Tag load instructions
+// -----------------------------------------------------------------------------
+
+// Load allocation tag
+// Load multiple allocation tags
+def : InstRW<[V3AEWrite_4c_1L], (instrs LDG, LDGM)>;
+
+// §3.11 Tag store instructions
+// -----------------------------------------------------------------------------
+
+// Store allocation tags to one or two granules, post-index
+// Store allocation tags to one or two granules, pre-index
+// Store allocation tag to one or two granules, zeroing, post-index
+// Store Allocation Tag to one or two granules, zeroing, pre-index
+// Store allocation tag and reg pair to memory, post-Index
+// Store allocation tag and reg pair to memory, pre-Index
+def : InstRW<[V3AEWrite_1c_1SA_1D_1I], (instrs STGPreIndex, STGPostIndex,
+ ST2GPreIndex, ST2GPostIndex,
+ STZGPreIndex, STZGPostIndex,
+ STZ2GPreIndex, STZ2GPostIndex,
+ STGPpre, STGPpost)>;
+
+// Store allocation tags to one or two granules, signed offset
+// Store allocation tag to two granules, zeroing, signed offset
+// Store allocation tag and reg pair to memory, signed offset
+// Store multiple allocation tags
+def : InstRW<[V3AEWrite_1c_1SA_1D], (instrs STGi, ST2Gi, STZGi,
+ STZ2Gi, STGPi, STGM, STZGM)>;
+
+// §3.12 FP data processing instructions
+// -----------------------------------------------------------------------------
+
+// FP absolute value
+// FP arithmetic
+// FP min/max
+// FP negate
+// FP select
+def : SchedAlias<WriteF, V3AEWrite_2c_1V>;
+
+// FP compare
+def : SchedAlias<WriteFCmp, V3AEWrite_2c_1V0>;
+
+// FP divide, square root
+def : SchedAlias<WriteFDiv, V3AEWrite_6c_1V1>;
+
+// FP divide, H-form
+def : InstRW<[V3AEWrite_6c_1V1], (instrs FDIVHrr)>;
+// FP divide, S-form
+def : InstRW<[V3AEWrite_8c_1V1], (instrs FDIVSrr)>;
+// FP divide, D-form
+def : InstRW<[V3AEWrite_13c_1V1], (instrs FDIVDrr)>;
+
+// FP square root, H-form
+def : InstRW<[V3AEWrite_6c_1V1], (instrs FSQRTHr)>;
+// FP square root, S-form
+def : InstRW<[V3AEWrite_8c_1V1], (instrs FSQRTSr)>;
+// FP square root, D-form
+def : InstRW<[V3AEWrite_13c_1V1], (instrs FSQRTDr)>;
+
+// FP multiply
+def : WriteRes<WriteFMul, [V3AEUnitV]> { let Latency = 3; }
+
+// FP multiply accumulate
+def : InstRW<[V3AEWr_FMA, ReadDefault, ReadDefault, V3AERd_FMA],
+ (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
+
+// FP round to integral
+def : InstRW<[V3AEWrite_3c_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
+ "^FRINT(32|64)[XZ][SD]r$")>;
+
+// §3.13 FP miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// FP convert, from gen to vec reg
+def : InstRW<[V3AEWrite_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
+
+// FP convert, from vec to gen reg
+def : InstRW<[V3AEWrite_3c_1V0],
+ (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>;
+
+// FP convert, Javascript from vec to gen reg
+def : SchedAlias<WriteFCvt, V3AEWrite_3c_1V0>;
+
+// FP convert, from vec to vec reg
+def : InstRW<[V3AEWrite_3c_1V], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr,
+ FCVTHDr, FCVTSDr, FCVTXNv1i64)>;
+
+// FP move, immed
+// FP move, register
+def : SchedAlias<WriteFImm, V3AEWrite_2c_1V>;
+
+// FP transfer, from gen to low half of vec reg
+def : InstRW<[V3AEWrite_0or3c_1M0],
+ (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
+
+// FP transfer, from gen to high half of vec reg
+def : InstRW<[V3AEWrite_5c_1M0_1V], (instrs FMOVXDHighr)>;
+
+// FP transfer, from vec to gen reg
+def : SchedAlias<WriteFCopy, V3AEWrite_2c_2V>;
+
+// §3.14 FP load instructions
+// -----------------------------------------------------------------------------
+
+// Load vector reg, literal, S/D/Q forms
+def : InstRW<[V3AEWrite_7c_1I_1L], (instregex "^LDR[SDQ]l$")>;
+
+// Load vector reg, unscaled immed
+def : InstRW<[V3AEWrite_6c_1L], (instregex "^LDUR[BHSDQ]i$")>;
+
+// Load vector reg, immed post-index
+// Load vector reg, immed pre-index
+def : InstRW<[WriteAdr, V3AEWrite_6c_1I_1L],
+ (instregex "^LDR[BHSDQ](pre|post)$")>;
+
+// Load vector reg, unsigned immed
+def : InstRW<[V3AEWrite_6c_1L], (instregex "^LDR[BHSDQ]ui$")>;
+
+// Load vector reg, register offset, basic
+// Load vector reg, register offset, scale, S/D-form
+// Load vector reg, register offset, scale, H/Q-form
+// Load vector reg, register offset, extend
+// Load vector reg, register offset, extend, scale, S/D-form
+// Load vector reg, register offset, extend, scale, H/Q-form
+def : InstRW<[V3AEWrite_LdrHQ, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>;
+
+// Load vector pair, immed offset, S/D-form
+def : InstRW<[V3AEWrite_6c_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
+
+// Load vector pair, immed offset, Q-form
+def : InstRW<[V3AEWrite_6c_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
+
+// Load vector pair, immed post-index, S/D-form
+// Load vector pair, immed pre-index, S/D-form
+def : InstRW<[WriteAdr, V3AEWrite_6c_1I_1L, WriteLDHi],
+ (instregex "^LDP[SD](pre|post)$")>;
+
+// Load vector pair, immed post-index, Q-form
+// Load vector pair, immed pre-index, Q-form
+def : InstRW<[WriteAdr, V3AEWrite_6c_2I_2L, WriteLDHi], (instrs LDPQpost,
+ LDPQpre)>;
+
+// §3.15 FP store instructions
+// -----------------------------------------------------------------------------
+
+// Store vector reg, unscaled immed, B/H/S/D-form
+// Store vector reg, unscaled immed, Q-form
+def : InstRW<[V3AEWrite_2c_1SA_1V], (instregex "^STUR[BHSDQ]i$")>;
+
+// Store vector reg, immed post-index, B/H/S/D-form
+// Store vector reg, immed post-index, Q-form
+// Store vector reg, immed pre-index, B/H/S/D-form
+// Store vector reg, immed pre-index, Q-form
+def : InstRW<[WriteAdr, V3AEWrite_2c_1SA_1V_1I],
+ (instregex "^STR[BHSDQ](pre|post)$")>;
+
+// Store vector reg, unsigned immed, B/H/S/D-form
+// Store vector reg, unsigned immed, Q-form
+def : InstRW<[V3AEWrite_2c_1SA_1V], (instregex "^STR[BHSDQ]ui$")>;
+
+// Store vector reg, register offset, basic, B/H/S/D-form
+// Store vector reg, register offset, basic, Q-form
+// Store vector reg, register offset, scale, H-form
+// Store vector reg, register offset, scale, S/D-form
+// Store vector reg, register offset, scale, Q-form
+// Store vector reg, register offset, extend, B/H/S/D-form
+// Store vector reg, register offset, extend, Q-form
+// Store vector reg, register offset, extend, scale, H-form
+// Store vector reg, register offset, extend, scale, S/D-form
+// Store vector reg, register offset, extend, scale, Q-form
+def : InstRW<[V3AEWrite_StrHQ, ReadAdrBase],
+ (instregex "^STR[BHSDQ]ro[WX]$")>;
+
+// Store vector pair, immed offset, S-form
+// Store vector pair, immed offset, D-form
+def : InstRW<[V3AEWrite_2c_1SA_1V], (instregex "^STN?P[SD]i$")>;
+
+// Store vector pair, immed offset, Q-form
+def : InstRW<[V3AEWrite_2c_1SA_2V], (instrs STPQi, STNPQi)>;
+
+// Store vector pair, immed post-index, S-form
+// Store vector pair, immed post-index, D-form
+// Store vector pair, immed pre-index, S-form
+// Store vector pair, immed pre-index, D-form
+def : InstRW<[WriteAdr, V3AEWrite_2c_1SA_1V_1I],
+ (instregex "^STP[SD](pre|post)$")>;
+
+// Store vector pair, immed post-index, Q-form
+def : InstRW<[V3AEWrite_2c_1SA_2V_1I], (instrs STPQpost)>;
+
+// Store vector pair, immed pre-index, Q-form
+def : InstRW<[V3AEWrite_2c_1SA_2V_2I], (instrs STPQpre)>;
+
+// §3.16 ASIMD integer instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD absolute diff
+// ASIMD absolute diff long
+// ASIMD arith, basic
+// ASIMD arith, complex
+// ASIMD arith, pair-wise
+// ASIMD compare
+// ASIMD logical
+// ASIMD max/min, basic and pair-wise
+def : SchedAlias<WriteVd, V3AEWrite_2c_1V>;
+def : SchedAlias<WriteVq, V3AEWrite_2c_1V>;
+
+// ASIMD absolute diff accum
+// ASIMD absolute diff accum long
+def : InstRW<[V3AEWr_VA, V3AERd_VA], (instregex "^[SU]ABAL?v")>;
+
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[V3AEWrite_3c_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
+
+// ASIMD arith, reduce, 8B/8H
+def : InstRW<[V3AEWrite_5c_1V1_1V],
+ (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
+
+// ASIMD arith, reduce, 16B
+def : InstRW<[V3AEWrite_6c_2V1], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
+
+// ASIMD dot product
+// ASIMD dot product using signed and unsigned integers
+def : InstRW<[V3AEWr_VDOT, V3AERd_VDOT],
+ (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
+
+// ASIMD matrix multiply-accumulate
+def : InstRW<[V3AEWr_VMMA, V3AERd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
+
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[V3AEWrite_3c_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$",
+ "^[SU](MAX|MIN)Vv4i32v$")>;
+
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[V3AEWrite_5c_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
+ "^[SU](MAX|MIN)Vv8i16v$")>;
+
+// ASIMD max/min, reduce, 16B
+def : InstRW<[V3AEWrite_6c_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
+
+// ASIMD multiply
+def : InstRW<[V3AEWrite_4c_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>;
+
+// ASIMD multiply accumulate
+def : InstRW<[V3AEWr_VMA, V3AERd_VMA], (instregex "^MLAv", "^MLSv")>;
+
+// ASIMD multiply accumulate high
+def : InstRW<[V3AEWr_VMAH, V3AERd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[V3AEWr_VMAL, V3AERd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
+
+// ASIMD multiply accumulate saturating long
+def : InstRW<[V3AEWrite_4c_1V0], (instregex "^SQDML[AS]L[iv]")>;
+
+// ASIMD multiply/multiply long (8x8) polynomial, D-form
+// ASIMD multiply/multiply long (8x8) polynomial, Q-form
+def : InstRW<[V3AEWrite_3c_1V], (instregex "^PMULL?(v8i8|v16i8)$")>;
+
+// ASIMD multiply long
+def : InstRW<[V3AEWrite_3c_1V0], (instregex "^[SU]MULLv", "^SQDMULL[iv]")>;
+
+// ASIMD pairwise add and accumulate long
+def : InstRW<[V3AEWr_VPA, V3AERd_VPA], (instregex "^[SU]ADALPv")>;
+
+// ASIMD shift accumulate
+def : InstRW<[V3AEWr_VSA, V3AERd_VSA], (instregex "^[SU]SRA[dv]", "^[SU]RSRA[dv]")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^SHL[dv]", "^SHLLv", "^SHRNv",
+ "^SSHLLv", "^SSHR[dv]", "^USHLLv",
+ "^USHR[dv]")>;
+
+// ASIMD shift by immed and insert, basic
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^SLI[dv]", "^SRI[dv]")>;
+
+// ASIMD shift by immed, complex
+def : InstRW<[V3AEWrite_4c_1V],
+ (instregex "^RSHRNv", "^SQRSHRU?N[bhsv]", "^(SQSHLU?|UQSHL)[bhsd]$",
+ "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
+ "^SQSHRU?N[bhsv]", "^SRSHR[dv]", "^UQRSHRN[bhsv]",
+ "^UQSHRN[bhsv]", "^URSHR[dv]")>;
+
+// ASIMD shift by register, basic
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^[SU]SHLv")>;
+
+// ASIMD shift by register, complex
+def : InstRW<[V3AEWrite_4c_1V],
+ (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
+ "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
+
+// §3.17 ASIMD floating-point instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD FP absolute value/difference
+// ASIMD FP arith, normal
+// ASIMD FP compare
+// ASIMD FP complex add
+// ASIMD FP max/min, normal
+// ASIMD FP max/min, pairwise
+// ASIMD FP negate
+// Handled by SchedAlias<WriteV[dq], ...>
+
+// ASIMD FP complex multiply add
+def : InstRW<[V3AEWr_VFCMA, V3AERd_VFCMA], (instregex "^FCMLAv")>;
+
+// ASIMD FP convert, long (F16 to F32)
+def : InstRW<[V3AEWrite_4c_2V0], (instregex "^FCVTL(v4|v8)i16")>;
+
+// ASIMD FP convert, long (F32 to F64)
+def : InstRW<[V3AEWrite_3c_1V0], (instregex "^FCVTL(v2|v4)i32")>;
+
+// ASIMD FP convert, narrow (F32 to F16)
+def : InstRW<[V3AEWrite_4c_2V0], (instregex "^FCVTN(v4|v8)i16")>;
+
+// ASIMD FP convert, narrow (F64 to F32)
+def : InstRW<[V3AEWrite_3c_1V0], (instregex "^FCVTN(v2|v4)i32",
+ "^FCVTXN(v2|v4)f32")>;
+
+// ASIMD FP convert, other, D-form F32 and Q-form F64
+def : InstRW<[V3AEWrite_3c_1V0], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
+ "^FCVT[AMNPZ][SU]v2i(32|64)_shift$",
+ "^FCVT[AMNPZ][SU]v1i64$",
+ "^FCVTZ[SU]d$",
+ "^[SU]CVTFv2f(32|64)$",
+ "^[SU]CVTFv2i(32|64)_shift$",
+ "^[SU]CVTFv1i64$",
+ "^[SU]CVTFd$")>;
+
+// ASIMD FP convert, other, D-form F16 and Q-form F32
+def : InstRW<[V3AEWrite_4c_2V0], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
+ "^FCVT[AMNPZ][SU]v4i(16|32)_shift$",
+ "^FCVT[AMNPZ][SU]v1i32$",
+ "^FCVTZ[SU]s$",
+ "^[SU]CVTFv4f(16|32)$",
+ "^[SU]CVTFv4i(16|32)_shift$",
+ "^[SU]CVTFv1i32$",
+ "^[SU]CVTFs$")>;
+
+// ASIMD FP convert, other, Q-form F16
+def : InstRW<[V3AEWrite_6c_4V0], (instregex "^FCVT[AMNPZ][SU]v8f16$",
+ "^FCVT[AMNPZ][SU]v8i16_shift$",
+ "^FCVT[AMNPZ][SU]v1f16$",
+ "^FCVTZ[SU]h$",
+ "^[SU]CVTFv8f16$",
+ "^[SU]CVTFv8i16_shift$",
+ "^[SU]CVTFv1i16$",
+ "^[SU]CVTFh$")>;
+
+// ASIMD FP divide, D-form, F16
+def : InstRW<[V3AEWrite_9c_1V1_4rc], (instrs FDIVv4f16)>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[V3AEWrite_9c_1V1_2rc], (instrs FDIVv2f32)>;
+
+// ASIMD FP divide, Q-form, F16
+def : InstRW<[V3AEWrite_13c_1V1_8rc], (instrs FDIVv8f16)>;
+
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[V3AEWrite_11c_1V1_4rc], (instrs FDIVv4f32)>;
+
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[V3AEWrite_14c_1V1_2rc], (instrs FDIVv2f64)>;
+
+// ASIMD FP max/min, reduce, F32 and D-form F16
+def : InstRW<[V3AEWrite_4c_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
+
+// ASIMD FP max/min, reduce, Q-form F16
+def : InstRW<[V3AEWrite_6c_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
+
+// ASIMD FP multiply
+def : InstRW<[V3AEWr_VFM], (instregex "^FMULv", "^FMULXv")>;
+
+// ASIMD FP multiply accumulate
+def : InstRW<[V3AEWr_VFMA, V3AERd_VFMA], (instregex "^FMLAv", "^FMLSv")>;
+
+// ASIMD FP multiply accumulate long
+def : InstRW<[V3AEWr_VFMAL, V3AERd_VFMAL], (instregex "^FML[AS]L2?(lane)?v")>;
+
+// ASIMD FP round, D-form F32 and Q-form F64
+def : InstRW<[V3AEWrite_3c_1V0],
+ (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
+ "^FRINT(32|64)[XZ]v2f(32|64)$")>;
+
+// ASIMD FP round, D-form F16 and Q-form F32
+def : InstRW<[V3AEWrite_4c_2V0],
+ (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
+ "^FRINT(32|64)[XZ]v4f32$")>;
+
+// ASIMD FP round, Q-form F16
+def : InstRW<[V3AEWrite_6c_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
+
+// ASIMD FP square root, D-form, F16
+def : InstRW<[V3AEWrite_9c_1V1_4rc], (instrs FSQRTv4f16)>;
+
+// ASIMD FP square root, D-form, F32
+def : InstRW<[V3AEWrite_9c_1V1_2rc], (instrs FSQRTv2f32)>;
+
+// ASIMD FP square root, Q-form, F16
+def : InstRW<[V3AEWrite_13c_1V1_8rc], (instrs FSQRTv8f16)>;
+
+// ASIMD FP square root, Q-form, F32
+def : InstRW<[V3AEWrite_11c_1V1_4rc], (instrs FSQRTv4f32)>;
+
+// ASIMD FP square root, Q-form, F64
+def : InstRW<[V3AEWrite_14c_1V1_2rc], (instrs FSQRTv2f64)>;
+
+// §3.18 ASIMD BFloat16 (BF16) instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD convert, F32 to BF16
+def : InstRW<[V3AEWrite_4c_2V0], (instrs BFCVTN, BFCVTN2)>;
+
+// ASIMD dot product
+def : InstRW<[V3AEWr_VBFDOT, V3AERd_VBFDOT], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
+
+// ASIMD matrix multiply accumulate
+def : InstRW<[V3AEWr_VBFMMA, V3AERd_VBFMMA], (instrs BFMMLA)>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[V3AEWr_VBFMAL, V3AERd_VBFMAL], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
+ BFMLALTIdx)>;
+
+// Scalar convert, F32 to BF16
+def : InstRW<[V3AEWrite_3c_1V0], (instrs BFCVT)>;
+
+// §3.19 ASIMD miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD bit reverse
+// ASIMD bitwise insert
+// ASIMD count
+// ASIMD duplicate, element
+// ASIMD extract
+// ASIMD extract narrow
+// ASIMD insert, element to element
+// ASIMD move, FP immed
+// ASIMD move, integer immed
+// ASIMD reverse
+// ASIMD table lookup extension, 1 table reg
+// ASIMD transpose
+// ASIMD unzip/zip
+// Handled by SchedAlias<WriteV[dq], ...>
+def : InstRW<[V3AEWrite_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;
+
+// ASIMD duplicate, gen reg
+def : InstRW<[V3AEWrite_3c_1M0], (instregex "^DUPv.+gpr")>;
+
+// ASIMD extract narrow, saturating
+def : InstRW<[V3AEWrite_4c_1V], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
+
+// ASIMD reciprocal and square root estimate, D-form U32
+def : InstRW<[V3AEWrite_3c_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>;
+
+// ASIMD reciprocal and square root estimate, Q-form U32
+def : InstRW<[V3AEWrite_4c_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>;
+
+// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
+def : InstRW<[V3AEWrite_3c_1V0], (instrs FRECPEv1f16, FRECPEv1i32,
+ FRECPEv1i64, FRECPEv2f32,
+ FRSQRTEv1f16, FRSQRTEv1i32,
+ FRSQRTEv1i64, FRSQRTEv2f32)>;
+
+// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
+def : InstRW<[V3AEWrite_4c_2V0], (instrs FRECPEv4f16, FRECPEv4f32,
+ FRSQRTEv4f16, FRSQRTEv4f32)>;
+
+// ASIMD reciprocal and square root estimate, Q-form F16
+def : InstRW<[V3AEWrite_6c_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
+
+// ASIMD reciprocal exponent
+def : InstRW<[V3AEWrite_3c_1V0], (instregex "^FRECPXv")>;
+
+// ASIMD reciprocal step
+def : InstRW<[V3AEWrite_4c_1V], (instregex "^FRECPS(32|64|v)",
+ "^FRSQRTS(32|64|v)")>;
+
+// ASIMD table lookup, 1 or 2 table regs
+def : InstRW<[V3AEWrite_2c_1V], (instrs TBLv8i8One, TBLv16i8One,
+ TBLv8i8Two, TBLv16i8Two)>;
+
+// ASIMD table lookup, 3 table regs
+def : InstRW<[V3AEWrite_4c_2V], (instrs TBLv8i8Three, TBLv16i8Three)>;
+
+// ASIMD table lookup, 4 table regs
+def : InstRW<[V3AEWrite_4c_3V], (instrs TBLv8i8Four, TBLv16i8Four)>;
+
+// ASIMD table lookup extension, 2 table reg
+def : InstRW<[V3AEWrite_4c_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
+
+// ASIMD table lookup extension, 3 table reg
+def : InstRW<[V3AEWrite_6c_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
+
+// ASIMD table lookup extension, 4 table reg
+def : InstRW<[V3AEWrite_6c_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[V3AEWrite_2c_2V], (instregex "^[SU]MOVv")>;
+
+// ASIMD transfer, gen reg to element
+def : InstRW<[V3AEWrite_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
+
+// §3.20 ASIMD load instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD load, 1 element, multiple, 1 reg, D-form
+def : InstRW<[V3AEWrite_6c_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_6c_1L],
+ (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[V3AEWrite_6c_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_6c_1L],
+ (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, D-form
+def : InstRW<[V3AEWrite_6c_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_6c_2L],
+ (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[V3AEWrite_6c_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_6c_2L],
+ (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, D-form
+def : InstRW<[V3AEWrite_6c_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_6c_3L],
+ (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[V3AEWrite_6c_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_6c_3L],
+ (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, D-form
+def : InstRW<[V3AEWrite_7c_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_7c_4L],
+ (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[V3AEWrite_7c_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_7c_4L],
+ (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, one lane, B/H/S
+// ASIMD load, 1 element, one lane, D
+def : InstRW<[V3AEWrite_8c_1L_1V], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, D-form, B/H/S
+// ASIMD load, 1 element, all lanes, D-form, D
+def : InstRW<[V3AEWrite_8c_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, Q-form
+def : InstRW<[V3AEWrite_8c_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, multiple, D-form, B/H/S
+def : InstRW<[V3AEWrite_8c_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 2 element, multiple, Q-form, B/H/S
+// ASIMD load, 2 element, multiple, Q-form, D
+def : InstRW<[V3AEWrite_8c_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, one lane, B/H
+// ASIMD load, 2 element, one lane, S
+// ASIMD load, 2 element, one lane, D
+def : InstRW<[V3AEWrite_8c_1L_2V], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, D-form, B/H/S
+// ASIMD load, 2 element, all lanes, D-form, D
+def : InstRW<[V3AEWrite_8c_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, Q-form
+def : InstRW<[V3AEWrite_8c_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple, D-form, B/H/S
+def : InstRW<[V3AEWrite_8c_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 3 element, multiple, Q-form, B/H/S
+// ASIMD load, 3 element, multiple, Q-form, D
+def : InstRW<[V3AEWrite_8c_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, one lane, B/H
+// ASIMD load, 3 element, one lane, S
+// ASIMD load, 3 element, one lane, D
+def : InstRW<[V3AEWrite_8c_2L_3V], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, D-form, B/H/S
+// ASIMD load, 3 element, all lanes, D-form, D
+def : InstRW<[V3AEWrite_8c_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, Q-form, B/H/S
+// ASIMD load, 3 element, all lanes, Q-form, D
+def : InstRW<[V3AEWrite_8c_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, D-form, B/H/S
+def : InstRW<[V3AEWrite_8c_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 4 element, multiple, Q-form, B/H/S
+// ASIMD load, 4 element, multiple, Q-form, D
+def : InstRW<[V3AEWrite_9c_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_9c_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, one lane, B/H
+// ASIMD load, 4 element, one lane, S
+// ASIMD load, 4 element, one lane, D
+def : InstRW<[V3AEWrite_8c_3L_4V], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, D-form, B/H/S
+// ASIMD load, 4 element, all lanes, D-form, D
+def : InstRW<[V3AEWrite_8c_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, Q-form, B/H/S
+// ASIMD load, 4 element, all lanes, Q-form, D
+def : InstRW<[V3AEWrite_8c_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_8c_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+// §3.21 ASIMD store instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD store, 1 element, multiple, 1 reg, D-form
+def : InstRW<[V3AEWrite_2c_1SA_1V], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_2c_1SA_1V], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[V3AEWrite_2c_1SA_1V], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_2c_1SA_1V], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, D-form
+def : InstRW<[V3AEWrite_2c_1SA_1V], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_2c_1SA_1V], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[V3AEWrite_2c_2SA_2V], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_2c_2SA_2V], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, D-form
+def : InstRW<[V3AEWrite_2c_2SA_2V], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_2c_2SA_2V], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[V3AEWrite_2c_3SA_3V], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_2c_3SA_3V], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, D-form
+def : InstRW<[V3AEWrite_2c_2SA_2V], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_2c_2SA_2V], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[V3AEWrite_2c_4SA_4V], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_2c_4SA_4V], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, one lane, B/H/S
+// ASIMD store, 1 element, one lane, D
+def : InstRW<[V3AEWrite_4c_1SA_2V], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_4c_1SA_2V], (instregex "ST1i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 2 element, multiple, D-form, B/H/S
+def : InstRW<[V3AEWrite_4c_1SA_2V], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_4c_1SA_2V], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 2 element, multiple, Q-form, B/H/S
+// ASIMD store, 2 element, multiple, Q-form, D
+def : InstRW<[V3AEWrite_4c_2SA_4V], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_4c_2SA_4V], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 2 element, one lane, B/H/S
+// ASIMD store, 2 element, one lane, D
+def : InstRW<[V3AEWrite_4c_1SA_2V], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_4c_1SA_2V], (instregex "ST2i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 3 element, multiple, D-form, B/H/S
+def : InstRW<[V3AEWrite_5c_2SA_4V], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_5c_2SA_4V], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 3 element, multiple, Q-form, B/H/S
+// ASIMD store, 3 element, multiple, Q-form, D
+def : InstRW<[V3AEWrite_6c_3SA_6V], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_6c_3SA_6V], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 3 element, one lane, B/H
+// ASIMD store, 3 element, one lane, S
+// ASIMD store, 3 element, one lane, D
+def : InstRW<[V3AEWrite_5c_2SA_4V], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_5c_2SA_4V], (instregex "ST3i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 4 element, multiple, D-form, B/H/S
+def : InstRW<[V3AEWrite_6c_2SA_6V], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_6c_2SA_6V], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 4 element, multiple, Q-form, B/H/S
+def : InstRW<[V3AEWrite_7c_4SA_12V], (instregex "ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_7c_4SA_12V], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+
+// ASIMD store, 4 element, multiple, Q-form, D
+def : InstRW<[V3AEWrite_5c_4SA_8V], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_5c_4SA_8V], (instregex "ST4Fourv(2d)_POST$")>;
+
+// ASIMD store, 4 element, one lane, B/H/S
+def : InstRW<[V3AEWrite_6c_1SA_3V], (instregex "ST4i(8|16|32)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_6c_1SA_3V], (instregex "ST4i(8|16|32)_POST$")>;
+
+// ASIMD store, 4 element, one lane, D
+def : InstRW<[V3AEWrite_4c_2SA_4V], (instregex "ST4i(64)$")>;
+def : InstRW<[WriteAdr, V3AEWrite_4c_2SA_4V], (instregex "ST4i(64)_POST$")>;
+
+// §3.22 Cryptography extensions
+// -----------------------------------------------------------------------------
+
+// Crypto AES ops
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
+
+// Crypto polynomial (64x64) multiply long
+def : InstRW<[V3AEWrite_2c_1V], (instrs PMULLv1i64, PMULLv2i64)>;
+
+// Crypto SHA1 hash acceleration op
+// Crypto SHA1 schedule acceleration ops
+def : InstRW<[V3AEWrite_2c_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
+
+// Crypto SHA1 hash acceleration ops
+// Crypto SHA256 hash acceleration ops
+def : InstRW<[V3AEWrite_4c_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
+
+// Crypto SHA256 schedule acceleration ops
+def : InstRW<[V3AEWrite_2c_1V0], (instregex "^SHA256SU[01]")>;
+
+// Crypto SHA512 hash acceleration ops
+def : InstRW<[V3AEWrite_2c_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
+
+// Crypto SHA3 ops
+def : InstRW<[V3AEWrite_2c_1V], (instrs BCAX, EOR3, RAX1, XAR)>;
+
+// Crypto SM3 ops
+def : InstRW<[V3AEWrite_2c_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
+ "^SM3TT[12][AB]$")>;
+
+// Crypto SM4 ops
+def : InstRW<[V3AEWrite_4c_1V0], (instrs SM4E, SM4ENCKEY)>;
+
+// §3.23 CRC
+// -----------------------------------------------------------------------------
+
+def : InstRW<[V3AEWr_CRC, V3AERd_CRC], (instregex "^CRC32")>;
+
+// §3.24 SVE Predicate instructions
+// -----------------------------------------------------------------------------
+
+// Loop control, based on predicate
+def : InstRW<[V3AEWrite_2or3c_1M], (instrs BRKA_PPmP, BRKA_PPzP,
+ BRKB_PPmP, BRKB_PPzP)>;
+
+// Loop control, based on predicate and flag setting
+def : InstRW<[V3AEWrite_2or3c_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
+
+// Loop control, propagating
+def : InstRW<[V3AEWrite_2or3c_1M], (instrs BRKN_PPzP, BRKPA_PPzPP,
+ BRKPB_PPzPP)>;
+
+// Loop control, propagating and flag setting
+def : InstRW<[V3AEWrite_2or3c_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
+ BRKPBS_PPzPP)>;
+
+// Loop control, based on GPR
+def : InstRW<[V3AEWrite_3c_2M],
+ (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
+def : InstRW<[V3AEWrite_3c_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
+
+// Loop terminate
+def : InstRW<[V3AEWrite_1c_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
+
+// Predicate counting scalar
+def : InstRW<[V3AEWrite_2c_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
+def : InstRW<[V3AEWrite_2c_1M],
+ (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI",
+ "^SQ(DEC|INC)[BHWD]_XPiWdI",
+ "^UQ(DEC|INC)[BHWD]_WPiI")>;
+
+// Predicate counting scalar, ALL, {1,2,4}
+def : InstRW<[V3AEWrite_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>;
+
+// Predicate counting scalar, active predicate
+def : InstRW<[V3AEWrite_2c_1M],
+ (instregex "^CNTP_XPP_[BHSD]",
+ "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
+ "^(UQDEC|UQINC)P_WP_[BHSD]",
+ "^(SQDEC|SQINC)P_XPWd_[BHSD]")>;
+
+// Predicate counting vector, active predicate
+def : InstRW<[V3AEWrite_7c_1M_1M0_1V],
+ (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
+
+// Predicate logical
+def : InstRW<[V3AEWrite_1or2c_1M],
+ (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
+
+// Predicate logical, flag setting
+def : InstRW<[V3AEWrite_1or2c_1M],
+ (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
+
+// Predicate reverse
+def : InstRW<[V3AEWrite_2c_1M], (instregex "^REV_PP_[BHSD]")>;
+
+// Predicate select
+def : InstRW<[V3AEWrite_1c_1M], (instrs SEL_PPPP)>;
+
+// Predicate set
+def : InstRW<[V3AEWrite_2c_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
+
+// Predicate set/initialize, set flags
+def : InstRW<[V3AEWrite_2c_1M], (instregex "^PTRUES_[BHSD]")>;
+
+// Predicate find first/next
+def : InstRW<[V3AEWrite_2c_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
+
+// Predicate test
+def : InstRW<[V3AEWrite_1c_1M], (instrs PTEST_PP)>;
+
+// Predicate transpose
+def : InstRW<[V3AEWrite_2c_1M], (instregex "^TRN[12]_PPP_[BHSD]")>;
+
+// Predicate unpack and widen
+def : InstRW<[V3AEWrite_2c_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
+
+// Predicate zip/unzip
+def : InstRW<[V3AEWrite_2c_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>;
+
+// §3.25 SVE integer instructions
+// -----------------------------------------------------------------------------
+
+// Arithmetic, absolute diff
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
+ "^[SU]ABD_ZPZZ_[BHSD]")>;
+
+// Arithmetic, absolute diff accum
+def : InstRW<[V3AEWr_ZA, V3AERd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
+
+// Arithmetic, absolute diff accum long
+def : InstRW<[V3AEWr_ZA, V3AERd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
+
+// Arithmetic, absolute diff long
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
+
+// Arithmetic, basic
+def : InstRW<[V3AEWrite_2c_1V],
+ (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^(ADD|SUB)_ZZZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZI_[BHSD]",
+ "^ADR_[SU]XTW_ZZZ_D_[0123]",
+ "^ADR_LSL_ZZZ_[SD]_[0123]",
+ "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
+ "^SADDLBT_ZZZ_[HSD]",
+ "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
+
+// Arithmetic, complex
+def : InstRW<[V3AEWrite_2c_1V],
+ (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
+ "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
+ "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
+ "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
+ "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
+
+// Arithmetic, large integer
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
+
+// Arithmetic, pairwise add
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>;
+
+// Arithmetic, pairwise add and accum long
+def : InstRW<[V3AEWr_ZPA, ReadDefault, V3AERd_ZPA],
+ (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
+
+// Arithmetic, shift
+def : InstRW<[V3AEWrite_2c_1V1],
+ (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
+ "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
+ "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
+ "^(ASR|LSL|LSR)_ZZI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
+ "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
+
+// Arithmetic, shift and accumulate
+def : InstRW<[V3AEWr_ZSA, V3AERd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>;
+
+// Arithmetic, shift by immediate
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^SHRN[BT]_ZZI_[BHS]",
+ "^[SU]SHLL[BT]_ZZI_[HSD]")>;
+
+// Arithmetic, shift by immediate and insert
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>;
+
+// Arithmetic, shift complex
+def : InstRW<[V3AEWrite_4c_1V],
+ (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
+ "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
+ "^[SU]QR?SHL_ZPZZ_[BHSD]",
+ "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
+ "^SQSHRU?N[BT]_ZZI_[BHS]",
+ "^UQR?SHRN[BT]_ZZI_[BHS]")>;
+
+// Arithmetic, shift right for divide
+def : InstRW<[V3AEWrite_4c_1V], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
+
+// Arithmetic, shift rounding
+def : InstRW<[V3AEWrite_4c_1V], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
+ "^[SU]RSHL_ZPZZ_[BHSD]",
+ "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
+
+// Bit manipulation
+def : InstRW<[V3AEWrite_6c_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
+
+// Bitwise select
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
+
+// Count/reverse bits
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
+
+// Broadcast logical bitmask immediate to vector
+def : InstRW<[V3AEWrite_2c_1V], (instrs DUPM_ZI)>;
+
+// Compare and set flags
+def : InstRW<[V3AEWrite_2or3c_1V0],
+ (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
+ "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
+
+// Complex add
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>;
+
+// Complex dot product 8-bit element
+def : InstRW<[V3AEWr_ZDOTB, V3AERd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
+
+// Complex dot product 16-bit element
+def : InstRW<[V3AEWr_ZDOTH, V3AERd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
+
+// Complex multiply-add B, H, S element size
+def : InstRW<[V3AEWr_ZCMABHS, V3AERd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]",
+ "^CMLA_ZZZI_[HS]")>;
+
+// Complex multiply-add D element size
+def : InstRW<[V3AEWr_ZCMAD, V3AERd_ZCMAD], (instrs CMLA_ZZZ_D)>;
+
+// Conditional extract operations, scalar form
+def : InstRW<[V3AEWrite_8c_1M0_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
+
+// Conditional extract operations, SIMD&FP scalar and vector forms
+def : InstRW<[V3AEWrite_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
+ "^COMPACT_ZPZ_[SD]",
+ "^SPLICE_ZPZZ?_[BHSD]")>;
+
+// Convert to floating point, 64b to float or convert to double
+def : InstRW<[V3AEWrite_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
+ "^[SU]CVTF_ZPmZ_StoD")>;
+
+// Convert to floating point, 32b to single or half
+def : InstRW<[V3AEWrite_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
+
+// Convert to floating point, 16b to half
+def : InstRW<[V3AEWrite_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
+
+// Copy, scalar
+def : InstRW<[V3AEWrite_5c_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>;
+
+// Copy, scalar SIMD&FP or imm
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^CPY_ZPm[IV]_[BHSD]",
+ "^CPY_ZPzI_[BHSD]")>;
+
+// Divides, 32 bit
+def : InstRW<[V3AEWrite_12c_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
+ "^[SU]DIV_ZPZZ_S")>;
+
+// Divides, 64 bit
+def : InstRW<[V3AEWrite_20c_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
+ "^[SU]DIV_ZPZZ_D")>;
+
+// Dot product, 8 bit
+def : InstRW<[V3AEWr_ZDOTB, V3AERd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_BtoS")>;
+
+// Dot product, 8 bit, using signed and unsigned integers
+def : InstRW<[V3AEWr_ZDOTB, V3AERd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
+
+// Dot product, 16 bit
+def : InstRW<[V3AEWr_ZDOTH, V3AERd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_HtoD")>;
+
+// Duplicate, immediate and indexed form
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^DUP_ZI_[BHSD]",
+ "^DUP_ZZI_[BHSDQ]")>;
+
+// Duplicate, scalar form
+def : InstRW<[V3AEWrite_3c_1M0], (instregex "^DUP_ZR_[BHSD]")>;
+
+// Extend, sign or zero
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^[SU]XTB_ZPmZ_[HSD]",
+ "^[SU]XTH_ZPmZ_[SD]",
+ "^[SU]XTW_ZPmZ_[D]")>;
+
+// Extract
+def : InstRW<[V3AEWrite_2c_1V], (instrs EXT_ZZI, EXT_ZZI_CONSTRUCTIVE, EXT_ZZI_B)>;
+
+// Extract narrow saturating
+def : InstRW<[V3AEWrite_4c_1V], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
+ "^SQXTUN[BT]_ZZ_[BHS]")>;
+
+// Extract operation, SIMD and FP scalar form
+def : InstRW<[V3AEWrite_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]")>;
+
+// Extract operation, scalar
+def : InstRW<[V3AEWrite_6c_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]")>;
+
+// Histogram operations
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^HISTCNT_ZPzZZ_[SD]",
+ "^HISTSEG_ZZZ")>;
+
+// Horizontal operations, B, H, S form, immediate operands only
+def : InstRW<[V3AEWrite_4c_1V0], (instregex "^INDEX_II_[BHS]")>;
+
+// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
+// operands only / immediate, scalar operands
+def : InstRW<[V3AEWrite_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
+
+// Horizontal operations, D form, immediate operands only
+def : InstRW<[V3AEWrite_5c_2V0], (instrs INDEX_II_D)>;
+
+// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
+// only / immediate, scalar operands
+def : InstRW<[V3AEWrite_8c_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D")>;
+
+// insert operation, SIMD and FP scalar form
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^INSR_ZV_[BHSD]")>;
+
+// insert operation, scalar
+def : InstRW<[V3AEWrite_5c_1V1_1M0], (instregex "^INSR_ZR_[BHSD]")>;
+
+// Logical
+def : InstRW<[V3AEWrite_2c_1V],
+ (instregex "^(AND|EOR|ORR)_ZI",
+ "^(AND|BIC|EOR|ORR)_ZZZ",
+ "^EOR(BT|TB)_ZZZ_[BHSD]",
+ "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
+ "^NOT_ZPmZ_[BHSD]")>;
+
+// Max/min, basic and pairwise
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
+ "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
+ "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
+
+// Matching operations
+// FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the
+// latency for this instruction is 4 cycles.
+def : InstRW<[V3AEWrite_2or3c_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>;
+
+// Matrix multiply-accumulate
+def : InstRW<[V3AEWr_ZMMA, V3AERd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
+
+// Move prefix
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
+ "^MOVPRFX_ZZ")>;
+
+// Multiply, B, H, S element size
+def : InstRW<[V3AEWrite_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
+ "^MUL_ZPZZ_[BHS]",
+ "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
+ "^[SU]MULH_ZPZZ_[BHS]")>;
+
+// Multiply, D element size
+def : InstRW<[V3AEWrite_5c_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
+ "^MUL_ZPZZ_D",
+ "^[SU]MULH_(ZPmZ|ZZZ)_D",
+ "^[SU]MULH_ZPZZ_D")>;
+
+// Multiply long
+def : InstRW<[V3AEWrite_4c_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
+ "^[SU]MULL[BT]_ZZZ_[HSD]")>;
+
+// Multiply accumulate, B, H, S element size
+def : InstRW<[V3AEWr_ZMABHS, V3AERd_ZMABHS],
+ (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>;
+def : InstRW<[V3AEWr_ZMABHS, ReadDefault, V3AERd_ZMABHS],
+ (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
+
+// Multiply accumulate, D element size
+def : InstRW<[V3AEWr_ZMAD, V3AERd_ZMAD],
+ (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>;
+def : InstRW<[V3AEWr_ZMAD, ReadDefault, V3AERd_ZMAD],
+ (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
+
+// Multiply accumulate long
+def : InstRW<[V3AEWr_ZMAL, V3AERd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
+ "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
+
+// Multiply accumulate saturating doubling long regular
+def : InstRW<[V3AEWr_ZMASQL, V3AERd_ZMASQ],
+ (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]",
+ "^SQDML[AS]L[BT]_ZZZI_[SD]")>;
+
+// Multiply saturating doubling high, B, H, S element size
+def : InstRW<[V3AEWrite_4c_1V0], (instregex "^SQDMULH_ZZZ_[BHS]",
+ "^SQDMULH_ZZZI_[HS]")>;
+
+// Multiply saturating doubling high, D element size
+def : InstRW<[V3AEWrite_5c_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
+
+// Multiply saturating doubling long
+def : InstRW<[V3AEWrite_4c_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
+ "^SQDMULL[BT]_ZZZI_[SD]")>;
+
+// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
+// element size
+def : InstRW<[V3AEWr_ZMASQBHS, V3AERd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
+ "^SQRDCMLAH_ZZZ_[BHS]",
+ "^SQRDML[AS]H_ZZZI_[HS]",
+ "^SQRDCMLAH_ZZZI_[HS]")>;
+
+// Multiply saturating rounding doubling regular/complex accumulate, D element
+// size
+def : InstRW<[V3AEWr_ZMASQD, V3AERd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D",
+ "^SQRDCMLAH_ZZZ_D")>;
+
+// Multiply saturating rounding doubling regular/complex, B, H, S element size
+def : InstRW<[V3AEWrite_4c_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]",
+ "^SQRDMULH_ZZZI_[HS]")>;
+
+// Multiply saturating rounding doubling regular/complex, D element size
+def : InstRW<[V3AEWrite_5c_2V0], (instregex "^SQRDMULH_ZZZI?_D")>;
+
+// Multiply/multiply long, (8x8) polynomial
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^PMUL_ZZZ_B",
+ "^PMULL[BT]_ZZZ_[HDQ]")>;
+
+// Predicate counting vector
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>;
+
+// Reciprocal estimate
+def : InstRW<[V3AEWrite_4c_2V0], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
+
+// Reduction, arithmetic, B form
+def : InstRW<[V3AEWrite_9c_2V_4V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
+
+// Reduction, arithmetic, H form
+def : InstRW<[V3AEWrite_8c_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
+
+// Reduction, arithmetic, S form
+def : InstRW<[V3AEWrite_6c_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
+
+// Reduction, arithmetic, D form
+def : InstRW<[V3AEWrite_4c_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
+
+// Reduction, logical
+def : InstRW<[V3AEWrite_6c_1V_1V1], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>;
+
+// Reverse, vector
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^REV_ZZ_[BHSD]",
+ "^REVB_ZPmZ_[HSD]",
+ "^REVH_ZPmZ_[SD]",
+ "^REVW_ZPmZ_D")>;
+
+// Select, vector form
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^SEL_ZPZZ_[BHSD]")>;
+
+// Table lookup
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>;
+
+// Table lookup extension
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^TBX_ZZZ_[BHSD]")>;
+
+// Transpose, vector form
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
+
+// Unpack and extend
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
+
+// Zip/unzip
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
+
+// §3.26 SVE floating-point instructions
+// -----------------------------------------------------------------------------
+
+// Floating point absolute value/difference
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
+ "^FABD_ZPZZ_[HSD]",
+ "^FABS_ZPmZ_[HSD]")>;
+
+// Floating point arithmetic
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
+ "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
+ "^FADDP_ZPmZZ_[HSD]",
+ "^FNEG_ZPmZ_[HSD]",
+ "^FSUBR_ZPm[IZ]_[HSD]",
+ "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
+
+// Floating point associative add, F16
+def : InstRW<[V3AEWrite_10c_1V1_9rc], (instrs FADDA_VPZ_H)>;
+
+// Floating point associative add, F32
+def : InstRW<[V3AEWrite_6c_1V1_5rc], (instrs FADDA_VPZ_S)>;
+
+// Floating point associative add, F64
+def : InstRW<[V3AEWrite_4c_1V], (instrs FADDA_VPZ_D)>;
+
+// Floating point compare
+def : InstRW<[V3AEWrite_2c_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]",
+ "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
+ "^FCM(LE|LT)_PPzZ0_[HSD]",
+ "^FCMUO_PPzZZ_[HSD]")>;
+
+// Floating point complex add
+def : InstRW<[V3AEWrite_3c_1V], (instregex "^FCADD_ZPmZ_[HSD]")>;
+
+// Floating point complex multiply add
+def : InstRW<[V3AEWr_ZFCMA, ReadDefault, V3AERd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
+def : InstRW<[V3AEWr_ZFCMA, V3AERd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>;
+
+// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
+def : InstRW<[V3AEWrite_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
+ "^FCVTLT_ZPmZ_HtoS",
+ "^FCVTNT_ZPmZ_StoH")>;
+
+// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
+// or F64 to F16)
+def : InstRW<[V3AEWrite_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
+ "^FCVTLT_ZPmZ_StoD",
+ "^FCVTNT_ZPmZ_DtoS")>;
+
+// Floating point convert, round to odd
+def : InstRW<[V3AEWrite_3c_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
+
+// Floating point base2 log, F16
+def : InstRW<[V3AEWrite_6c_4V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
+
+// Floating point base2 log, F32
+def : InstRW<[V3AEWrite_4c_2V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
+
+// Floating point base2 log, F64
+def : InstRW<[V3AEWrite_3c_1V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
+
+// Floating point convert to integer, F16
+def : InstRW<[V3AEWrite_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
+
+// Floating point convert to integer, F32
+def : InstRW<[V3AEWrite_4c_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
+
+// Floating point convert to integer, F64
+def : InstRW<[V3AEWrite_3c_1V0],
+ (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
+
+// Floating point copy
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^FCPY_ZPmI_[HSD]",
+ "^FDUP_ZI_[HSD]")>;
+
+// Floating point divide, F16
+def : InstRW<[V3AEWrite_13c_1V1_8rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
+
+// Floating point divide, F32
+def : InstRW<[V3AEWrite_11c_1V1_4rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
+
+// Floating point divide, F64
+def : InstRW<[V3AEWrite_14c_1V1_2rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
+
+// Floating point min/max pairwise
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
+
+// Floating point min/max
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
+ "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
+
+// Floating point multiply
+def : InstRW<[V3AEWrite_3c_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
+ "^FMULX_ZPZZ_[HSD]",
+ "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
+ "^FMUL_ZPZ[IZ]_[HSD]")>;
+
+// Floating point multiply accumulate
+def : InstRW<[V3AEWr_ZFMA, ReadDefault, V3AERd_ZFMA],
+ (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
+ "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
+def : InstRW<[V3AEWr_ZFMA, V3AERd_ZFMA],
+ (instregex "^FML[AS]_ZZZI_[HSD]",
+ "^FN?ML[AS]_ZPZZZ_[HSD]")>;
+
+// Floating point multiply add/sub accumulate long
+def : InstRW<[V3AEWr_ZFMAL, V3AERd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
+
+// Floating point reciprocal estimate, F16
+def : InstRW<[V3AEWrite_6c_4V0], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
+
+// Floating point reciprocal estimate, F32
+def : InstRW<[V3AEWrite_4c_2V0], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
+
+// Floating point reciprocal estimate, F64
+def : InstRW<[V3AEWrite_3c_1V0], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
+
+// Floating point reciprocal step
+def : InstRW<[V3AEWrite_4c_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
+
+// Floating point reduction, F16
+def : InstRW<[V3AEWrite_8c_4V],
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>;
+
+// Floating point reduction, F32
+def : InstRW<[V3AEWrite_6c_3V],
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>;
+
+// Floating point reduction, F64
+def : InstRW<[V3AEWrite_4c_2V],
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>;
+
+// Floating point round to integral, F16
+def : InstRW<[V3AEWrite_6c_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
+
+// Floating point round to integral, F32
+def : InstRW<[V3AEWrite_4c_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
+
+// Floating point round to integral, F64
+def : InstRW<[V3AEWrite_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
+
+// Floating point square root, F16
+def : InstRW<[V3AEWrite_13c_1V1_8rc], (instregex "^FSQRT_ZPmZ_H")>;
+
+// Floating point square root, F32
+def : InstRW<[V3AEWrite_11c_1V1_4rc], (instregex "^FSQRT_ZPmZ_S")>;
+
+// Floating point square root, F64
+def : InstRW<[V3AEWrite_14c_1V1_2rc], (instregex "^FSQRT_ZPmZ_D")>;
+
+// Floating point trigonometric exponentiation
+def : InstRW<[V3AEWrite_3c_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
+
+// Floating point trigonometric multiply add
+def : InstRW<[V3AEWrite_4c_1V], (instregex "^FTMAD_ZZI_[HSD]")>;
+
+// Floating point trigonometric, miscellaneous
+def : InstRW<[V3AEWrite_3c_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>;
+
+// §3.27 SVE BFloat16 (BF16) instructions
+// -----------------------------------------------------------------------------
+
+// Convert, F32 to BF16
+def : InstRW<[V3AEWrite_4c_1V], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
+
+// Dot product
+def : InstRW<[V3AEWr_ZBFDOT, V3AERd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
+
+// Matrix multiply accumulate
+def : InstRW<[V3AEWr_ZBFMMA, V3AERd_ZBFMMA], (instrs BFMMLA_ZZZ_HtoS)>;
+
+// Multiply accumulate long
+def : InstRW<[V3AEWr_ZBFMAL, V3AERd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>;
+
+// §3.28 SVE Load instructions
+// -----------------------------------------------------------------------------
+
+// Load vector
+def : InstRW<[V3AEWrite_6c_1L], (instrs LDR_ZXI)>;
+
+// Load predicate
+def : InstRW<[V3AEWrite_6c_1L_1M], (instrs LDR_PXI)>;
+
+// Contiguous load, scalar + imm
+def : InstRW<[V3AEWrite_6c_1L], (instregex "^LD1[BHWD]_IMM$",
+ "^LD1S?B_[HSD]_IMM$",
+ "^LD1S?H_[SD]_IMM$",
+ "^LD1S?W_D_IMM$" )>;
+// Contiguous load, scalar + scalar
+def : InstRW<[V3AEWrite_6c_1L], (instregex "^LD1[BHWD]$",
+ "^LD1S?B_[HSD]$",
+ "^LD1S?H_[SD]$",
+ "^LD1S?W_D$" )>;
+
+// Contiguous load broadcast, scalar + imm
+def : InstRW<[V3AEWrite_6c_1L], (instregex "^LD1R[BHWD]_IMM$",
+ "^LD1RS?B_[HSD]_IMM$",
+ "^LD1RS?H_[SD]_IMM$",
+ "^LD1RW_D_IMM$",
+ "^LD1RSW_IMM$",
+ "^LD1RQ_[BHWD]_IMM$")>;
+
+// Contiguous load broadcast, scalar + scalar
+def : InstRW<[V3AEWrite_6c_1L], (instregex "^LD1RQ_[BHWD]$")>;
+
+// Non temporal load, scalar + imm
+// Non temporal load, scalar + scalar
+def : InstRW<[V3AEWrite_6c_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>;
+
+// Non temporal gather load, vector + scalar 32-bit element size
+def : InstRW<[V3AEWrite_9c_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S$",
+ "^LDNT1S[BH]_ZZR_S$")>;
+
+// Non temporal gather load, vector + scalar 64-bit element size
+def : InstRW<[V3AEWrite_9c_2L_2V], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
+def : InstRW<[V3AEWrite_9c_2L_2V], (instrs LDNT1D_ZZR_D)>;
+
+// Contiguous first faulting load, scalar + scalar
+def : InstRW<[V3AEWrite_6c_1L_1I], (instregex "^LDFF1[BHWD]$",
+ "^LDFF1S?B_[HSD]$",
+ "^LDFF1S?H_[SD]$",
+ "^LDFF1S?W_D$")>;
+
+// Contiguous non faulting load, scalar + imm
+def : InstRW<[V3AEWrite_6c_1L], (instregex "^LDNF1[BHWD]_IMM$",
+ "^LDNF1S?B_[HSD]_IMM$",
+ "^LDNF1S?H_[SD]_IMM$",
+ "^LDNF1S?W_D_IMM$")>;
+
+// Contiguous Load two structures to two vectors, scalar + imm
+def : InstRW<[V3AEWrite_8c_2L_2V], (instregex "^LD2[BHWD]_IMM$")>;
+
+// Contiguous Load two structures to two vectors, scalar + scalar
+def : InstRW<[V3AEWrite_9c_2L_2V_2I], (instregex "^LD2[BHWD]$")>;
+
+// Contiguous Load three structures to three vectors, scalar + imm
+def : InstRW<[V3AEWrite_9c_3L_3V], (instregex "^LD3[BHWD]_IMM$")>;
+
+// Contiguous Load three structures to three vectors, scalar + scalar
+def : InstRW<[V3AEWrite_10c_3V_3L_3I], (instregex "^LD3[BHWD]$")>;
+
+// Contiguous Load four structures to four vectors, scalar + imm
+def : InstRW<[V3AEWrite_9c_4L_8V], (instregex "^LD4[BHWD]_IMM$")>;
+
+// Contiguous Load four structures to four vectors, scalar + scalar
+def : InstRW<[V3AEWrite_10c_4L_8V_4I], (instregex "^LD4[BHWD]$")>;
+
+// Gather load, vector + imm, 32-bit element size
+def : InstRW<[V3AEWrite_9c_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
+ "^GLD(FF)?1W_IMM$")>;
+
+// Gather load, vector + imm, 64-bit element size
+def : InstRW<[V3AEWrite_9c_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
+ "^GLD(FF)?1D_IMM$")>;
+
+// Gather load, 32-bit scaled offset
+def : InstRW<[V3AEWrite_10c_1L_8V],
+ (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED")>;
+
+// Gather load, 64-bit scaled offset
+// NOTE: These instructions are not specified in the SOG.
+def : InstRW<[V3AEWrite_10c_1L_4V],
+ (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED$",
+ "^GLD(FF)?1D_([SU]XTW_)?SCALED$")>;
+
+// Gather load, 32-bit unpacked unscaled offset
+def : InstRW<[V3AEWrite_9c_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
+ "^GLD(FF)?1W_[SU]XTW$")>;
+
+// Gather load, 64-bit unpacked unscaled offset
+// NOTE: These instructions are not specified in the SOG.
+def : InstRW<[V3AEWrite_9c_1L_2V],
+ (instregex "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?$",
+ "^GLD(FF)?1D(_[SU]XTW)?$")>;
+
+// §3.29 SVE Store instructions
+// -----------------------------------------------------------------------------
+
+// Store from predicate reg
+def : InstRW<[V3AEWrite_1c_1SA], (instrs STR_PXI)>;
+
+// Store from vector reg
+def : InstRW<[V3AEWrite_2c_1SA_1V], (instrs STR_ZXI)>;
+
+// Contiguous store, scalar + imm
+def : InstRW<[V3AEWrite_2c_1SA_1V], (instregex "^ST1[BHWD]_IMM$",
+ "^ST1B_[HSD]_IMM$",
+ "^ST1H_[SD]_IMM$",
+ "^ST1W_D_IMM$")>;
+
+// Contiguous store, scalar + scalar
+def : InstRW<[V3AEWrite_2c_1SA_1I_1V], (instregex "^ST1H(_[SD])?$")>;
+def : InstRW<[V3AEWrite_2c_1SA_1V], (instregex "^ST1[BWD]$",
+ "^ST1B_[HSD]$",
+ "^ST1W_D$")>;
+
+// Contiguous store two structures from two vectors, scalar + imm
+def : InstRW<[V3AEWrite_4c_1SA_1V], (instregex "^ST2[BHWD]_IMM$")>;
+
+// Contiguous store two structures from two vectors, scalar + scalar
+def : InstRW<[V3AEWrite_4c_2SA_2I_2V], (instrs ST2H)>;
+def : InstRW<[V3AEWrite_4c_2SA_2V], (instregex "^ST2[BWD]$")>;
+
+// Contiguous store three structures from three vectors, scalar + imm
+def : InstRW<[V3AEWrite_7c_9SA_9V], (instregex "^ST3[BHWD]_IMM$")>;
+
+// Contiguous store three structures from three vectors, scalar + scalar
+def : InstRW<[V3AEWrite_7c_9SA_9I_9V], (instregex "^ST3[BHWD]$")>;
+
+// Contiguous store four structures from four vectors, scalar + imm
+def : InstRW<[V3AEWrite_11c_18SA_18V], (instregex "^ST4[BHWD]_IMM$")>;
+
+// Contiguous store four structures from four vectors, scalar + scalar
+def : InstRW<[V3AEWrite_11c_18SA_18I_18V], (instregex "^ST4[BHWD]$")>;
+
+// Non temporal store, scalar + imm
+def : InstRW<[V3AEWrite_2c_1SA_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
+
+// Non temporal store, scalar + scalar
+def : InstRW<[V3AEWrite_2c_1SA_1I_1V], (instrs STNT1H_ZRR)>;
+def : InstRW<[V3AEWrite_2c_1SA_1V], (instregex "^STNT1[BWD]_ZRR$")>;
+
+// Scatter non temporal store, vector + scalar 32-bit element size
+def : InstRW<[V3AEWrite_4c_4SA_4V], (instregex "^STNT1[BHW]_ZZR_S")>;
+
+// Scatter non temporal store, vector + scalar 64-bit element size
+def : InstRW<[V3AEWrite_2c_2SA_2V], (instregex "^STNT1[BHWD]_ZZR_D")>;
+
+// Scatter store vector + imm 32-bit element size
+def : InstRW<[V3AEWrite_4c_4SA_4V], (instregex "^SST1[BH]_S_IMM$",
+ "^SST1W_IMM$")>;
+
+// Scatter store vector + imm 64-bit element size
+def : InstRW<[V3AEWrite_2c_2SA_2V], (instregex "^SST1[BHW]_D_IMM$",
+ "^SST1D_IMM$")>;
+
+// Scatter store, 32-bit scaled offset
+def : InstRW<[V3AEWrite_4c_4SA_4V],
+ (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
+
+// Scatter store, 32-bit unpacked unscaled offset
+def : InstRW<[V3AEWrite_2c_2SA_2V], (instregex "^SST1[BHW]_D_[SU]XTW$",
+ "^SST1D_[SU]XTW$")>;
+
+// Scatter store, 32-bit unpacked scaled offset
+def : InstRW<[V3AEWrite_2c_2SA_2V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
+ "^SST1D_[SU]XTW_SCALED$")>;
+
+// Scatter store, 32-bit unscaled offset
+def : InstRW<[V3AEWrite_4c_4SA_4V], (instregex "^SST1[BH]_S_[SU]XTW$",
+ "^SST1W_[SU]XTW$")>;
+
+// Scatter store, 64-bit scaled offset
+def : InstRW<[V3AEWrite_2c_2SA_2V], (instregex "^SST1[HW]_D_SCALED$",
+ "^SST1D_SCALED$")>;
+
+// Scatter store, 64-bit unscaled offset
+def : InstRW<[V3AEWrite_2c_2SA_2V], (instregex "^SST1[BHW]_D$",
+ "^SST1D$")>;
+
+// §3.30 SVE Miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// Read first fault register, unpredicated
+def : InstRW<[V3AEWrite_2c_1M0], (instrs RDFFR_P)>;
+
+// Read first fault register, predicated
+def : InstRW<[V3AEWrite_3or4c_1M0_1M], (instrs RDFFR_PPz)>;
+
+// Read first fault register and set flags
+def : InstRW<[V3AEWrite_3or4c_1M0_1M], (instrs RDFFRS_PPz)>;
+
+// Set first fault register
+// Write to first fault register
+def : InstRW<[V3AEWrite_2c_1M0], (instrs SETFFR, WRFFR)>;
+
+// Prefetch
+// NOTE: This is not specified in the SOG.
+def : InstRW<[V3AEWrite_4c_1L], (instregex "^PRF[BHWD]")>;
+
+// §3.31 SVE Cryptographic instructions
+// -----------------------------------------------------------------------------
+
+// Crypto AES ops
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^AES[DE]_ZZZ_B$",
+ "^AESI?MC_ZZ_B$")>;
+
+// Crypto SHA3 ops
+def : InstRW<[V3AEWrite_2c_1V], (instregex "^(BCAX|EOR3)_ZZZZ$",
+ "^RAX1_ZZZ_D$",
+ "^XAR_ZZZI_[BHSD]$")>;
+
+// Crypto SM4 ops
+def : InstRW<[V3AEWrite_4c_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
+
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 5b80b08..068954f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -764,8 +764,8 @@ bool AArch64PassConfig::addGlobalInstructionSelect() {
}
void AArch64PassConfig::addMachineSSAOptimization() {
- if (EnableNewSMEABILowering && TM->getOptLevel() != CodeGenOptLevel::None)
- addPass(createMachineSMEABIPass());
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableNewSMEABILowering)
+ addPass(createMachineSMEABIPass(TM->getOptLevel()));
if (TM->getOptLevel() != CodeGenOptLevel::None && EnableSMEPeepholeOpt)
addPass(createSMEPeepholeOptPass());
@@ -798,7 +798,7 @@ bool AArch64PassConfig::addILPOpts() {
void AArch64PassConfig::addPreRegAlloc() {
if (TM->getOptLevel() == CodeGenOptLevel::None && EnableNewSMEABILowering)
- addPass(createMachineSMEABIPass());
+ addPass(createMachineSMEABIPass(CodeGenOptLevel::None));
// Change dead register definitions to refer to the zero register.
if (TM->getOptLevel() != CodeGenOptLevel::None &&
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index 434ea67..7cb5003 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -121,8 +121,10 @@ struct InstInfo {
/// Contains the needed ZA state for each instruction in a block. Instructions
/// that do not require a ZA state are not recorded.
struct BlockInfo {
- ZAState FixedEntryState{ZAState::ANY};
SmallVector<InstInfo> Insts;
+ ZAState FixedEntryState{ZAState::ANY};
+ ZAState DesiredIncomingState{ZAState::ANY};
+ ZAState DesiredOutgoingState{ZAState::ANY};
LiveRegs PhysLiveRegsAtEntry = LiveRegs::None;
LiveRegs PhysLiveRegsAtExit = LiveRegs::None;
};
@@ -175,10 +177,15 @@ private:
Register AgnosticZABufferPtr = AArch64::NoRegister;
};
+/// Checks if \p State is a legal edge bundle state. For a state to be a legal
+/// bundle state, it must be possible to transition from it to any other bundle
+/// state without losing any ZA state. This is the case for ACTIVE/LOCAL_SAVED,
+/// as you can transition between those states by saving/restoring ZA. The OFF
+/// state would not be legal, as transitioning to it drops the content of ZA.
static bool isLegalEdgeBundleZAState(ZAState State) {
switch (State) {
- case ZAState::ACTIVE:
- case ZAState::LOCAL_SAVED:
+ case ZAState::ACTIVE: // ZA state within the accumulator/ZT0.
+ case ZAState::LOCAL_SAVED: // ZA state is saved on the stack.
return true;
default:
return false;
@@ -238,7 +245,8 @@ getZAStateBeforeInst(const TargetRegisterInfo &TRI, MachineInstr &MI,
struct MachineSMEABI : public MachineFunctionPass {
inline static char ID = 0;
- MachineSMEABI() : MachineFunctionPass(ID) {}
+ MachineSMEABI(CodeGenOptLevel OptLevel = CodeGenOptLevel::Default)
+ : MachineFunctionPass(ID), OptLevel(OptLevel) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -267,6 +275,11 @@ struct MachineSMEABI : public MachineFunctionPass {
const EdgeBundles &Bundles,
ArrayRef<ZAState> BundleStates);
+ /// Propagates desired states forwards (from predecessors -> successors) if
+ /// \p Forwards, otherwise, propagates backwards (from successors ->
+ /// predecessors).
+ void propagateDesiredStates(FunctionInfo &FnInfo, bool Forwards = true);
+
// Emission routines for private and shared ZA functions (using lazy saves).
void emitNewZAPrologue(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
@@ -335,12 +348,15 @@ struct MachineSMEABI : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI, DebugLoc DL);
private:
+ CodeGenOptLevel OptLevel = CodeGenOptLevel::Default;
+
MachineFunction *MF = nullptr;
const AArch64Subtarget *Subtarget = nullptr;
const AArch64RegisterInfo *TRI = nullptr;
const AArch64FunctionInfo *AFI = nullptr;
const TargetInstrInfo *TII = nullptr;
MachineRegisterInfo *MRI = nullptr;
+ MachineLoopInfo *MLI = nullptr;
};
static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) {
@@ -422,12 +438,69 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
// Reverse vector (as we had to iterate backwards for liveness).
std::reverse(Block.Insts.begin(), Block.Insts.end());
+
+ // Record the desired states on entry/exit of this block. These are the
+ // states that would not incur a state transition.
+ if (!Block.Insts.empty()) {
+ Block.DesiredIncomingState = Block.Insts.front().NeededState;
+ Block.DesiredOutgoingState = Block.Insts.back().NeededState;
+ }
}
return FunctionInfo{std::move(Blocks), AfterSMEProloguePt,
PhysLiveRegsAfterSMEPrologue};
}
+void MachineSMEABI::propagateDesiredStates(FunctionInfo &FnInfo,
+ bool Forwards) {
+ // If `Forwards`, this propagates desired states from predecessors to
+ // successors, otherwise, this propagates states from successors to
+ // predecessors.
+ auto GetBlockState = [](BlockInfo &Block, bool Incoming) -> ZAState & {
+ return Incoming ? Block.DesiredIncomingState : Block.DesiredOutgoingState;
+ };
+
+ SmallVector<MachineBasicBlock *> Worklist;
+ for (auto [BlockID, BlockInfo] : enumerate(FnInfo.Blocks)) {
+ if (!isLegalEdgeBundleZAState(GetBlockState(BlockInfo, Forwards)))
+ Worklist.push_back(MF->getBlockNumbered(BlockID));
+ }
+
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = Worklist.pop_back_val();
+ BlockInfo &Block = FnInfo.Blocks[MBB->getNumber()];
+
+ // Pick a legal edge bundle state that matches the majority of
+ // predecessors/successors.
+ int StateCounts[ZAState::NUM_ZA_STATE] = {0};
+ for (MachineBasicBlock *PredOrSucc :
+ Forwards ? predecessors(MBB) : successors(MBB)) {
+ BlockInfo &PredOrSuccBlock = FnInfo.Blocks[PredOrSucc->getNumber()];
+ ZAState ZAState = GetBlockState(PredOrSuccBlock, !Forwards);
+ if (isLegalEdgeBundleZAState(ZAState))
+ StateCounts[ZAState]++;
+ }
+
+ ZAState PropagatedState = ZAState(max_element(StateCounts) - StateCounts);
+ ZAState &CurrentState = GetBlockState(Block, Forwards);
+ if (PropagatedState != CurrentState) {
+ CurrentState = PropagatedState;
+ ZAState &OtherState = GetBlockState(Block, !Forwards);
+ // Propagate to the incoming/outgoing state if that is also "ANY".
+ if (OtherState == ZAState::ANY)
+ OtherState = PropagatedState;
+ // Push any successors/predecessors that may need updating to the
+ // worklist.
+ for (MachineBasicBlock *SuccOrPred :
+ Forwards ? successors(MBB) : predecessors(MBB)) {
+ BlockInfo &SuccOrPredBlock = FnInfo.Blocks[SuccOrPred->getNumber()];
+ if (!isLegalEdgeBundleZAState(GetBlockState(SuccOrPredBlock, Forwards)))
+ Worklist.push_back(SuccOrPred);
+ }
+ }
+ }
+}
+
/// Assigns each edge bundle a ZA state based on the needed states of blocks
/// that have incoming or outgoing edges in that bundle.
SmallVector<ZAState>
@@ -440,40 +513,36 @@ MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles,
// Attempt to assign a ZA state for this bundle that minimizes state
// transitions. Edges within loops are given a higher weight as we assume
// they will be executed more than once.
- // TODO: We should propagate desired incoming/outgoing states through blocks
- // that have the "ANY" state first to make better global decisions.
int EdgeStateCounts[ZAState::NUM_ZA_STATE] = {0};
for (unsigned BlockID : Bundles.getBlocks(I)) {
LLVM_DEBUG(dbgs() << "- bb." << BlockID);
const BlockInfo &Block = FnInfo.Blocks[BlockID];
- if (Block.Insts.empty()) {
- LLVM_DEBUG(dbgs() << " (no state preference)\n");
- continue;
- }
bool InEdge = Bundles.getBundle(BlockID, /*Out=*/false) == I;
bool OutEdge = Bundles.getBundle(BlockID, /*Out=*/true) == I;
- ZAState DesiredIncomingState = Block.Insts.front().NeededState;
- if (InEdge && isLegalEdgeBundleZAState(DesiredIncomingState)) {
- EdgeStateCounts[DesiredIncomingState]++;
+ bool LegalInEdge =
+ InEdge && isLegalEdgeBundleZAState(Block.DesiredIncomingState);
+ bool LegalOutEgde =
+ OutEdge && isLegalEdgeBundleZAState(Block.DesiredOutgoingState);
+ if (LegalInEdge) {
LLVM_DEBUG(dbgs() << " DesiredIncomingState: "
- << getZAStateString(DesiredIncomingState));
+ << getZAStateString(Block.DesiredIncomingState));
+ EdgeStateCounts[Block.DesiredIncomingState]++;
}
- ZAState DesiredOutgoingState = Block.Insts.back().NeededState;
- if (OutEdge && isLegalEdgeBundleZAState(DesiredOutgoingState)) {
- EdgeStateCounts[DesiredOutgoingState]++;
+ if (LegalOutEgde) {
LLVM_DEBUG(dbgs() << " DesiredOutgoingState: "
- << getZAStateString(DesiredOutgoingState));
+ << getZAStateString(Block.DesiredOutgoingState));
+ EdgeStateCounts[Block.DesiredOutgoingState]++;
}
+ if (!LegalInEdge && !LegalOutEgde)
+ LLVM_DEBUG(dbgs() << " (no state preference)");
LLVM_DEBUG(dbgs() << '\n');
}
ZAState BundleState =
ZAState(max_element(EdgeStateCounts) - EdgeStateCounts);
- // Force ZA to be active in bundles that don't have a preferred state.
- // TODO: Something better here (to avoid extra mode switches).
if (BundleState == ZAState::ANY)
BundleState = ZAState::ACTIVE;
@@ -918,6 +987,43 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
FunctionInfo FnInfo = collectNeededZAStates(SMEFnAttrs);
+
+ if (OptLevel != CodeGenOptLevel::None) {
+ // Propagate desired states forward, then backwards. Most of the propagation
+ // should be done in the forward step, and backwards propagation is then
+ // used to fill in the gaps. Note: Doing both in one step can give poor
+ // results. For example, consider this subgraph:
+ //
+ // ┌─────┐
+ // ┌─┤ BB0 ◄───┐
+ // │ └─┬───┘ │
+ // │ ┌─▼───◄──┐│
+ // │ │ BB1 │ ││
+ // │ └─┬┬──┘ ││
+ // │ │└─────┘│
+ // │ ┌─▼───┐ │
+ // │ │ BB2 ├───┘
+ // │ └─┬───┘
+ // │ ┌─▼───┐
+ // └─► BB3 │
+ // └─────┘
+ //
+ // If:
+ // - "BB0" and "BB2" (outer loop) has no state preference
+ // - "BB1" (inner loop) desires the ACTIVE state on entry/exit
+ // - "BB3" desires the LOCAL_SAVED state on entry
+ //
+ // If we propagate forwards first, ACTIVE is propagated from BB1 to BB2,
+ // then from BB2 to BB0. Which results in the inner and outer loops having
+ // the "ACTIVE" state. This avoids any state changes in the loops.
+ //
+ // If we propagate backwards first, we _could_ propagate LOCAL_SAVED from
+ // BB3 to BB0, which would result in a transition from ACTIVE -> LOCAL_SAVED
+ // in the outer loop.
+ for (bool Forwards : {true, false})
+ propagateDesiredStates(FnInfo, Forwards);
+ }
+
SmallVector<ZAState> BundleStates = assignBundleZAStates(Bundles, FnInfo);
EmitContext Context;
@@ -941,4 +1047,6 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-FunctionPass *llvm::createMachineSMEABIPass() { return new MachineSMEABI(); }
+FunctionPass *llvm::createMachineSMEABIPass(CodeGenOptLevel OptLevel) {
+ return new MachineSMEABI(OptLevel);
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index a911e7e..52cc4ca 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -3267,29 +3267,103 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
return false;
assert(!ST.hasExtendedWaitCounts());
- if (!ST.isWave64() || !SIInstrInfo::isSALU(*MI))
+ if (!ST.isWave64())
+ return false;
+
+ const bool IsSALU = SIInstrInfo::isSALU(*MI);
+ const bool IsVALU = SIInstrInfo::isVALU(*MI);
+ if (!IsSALU && !IsVALU)
return false;
// The hazard sequence is three instructions:
// 1. VALU reads SGPR as mask
- // 2. SALU writes SGPR
- // 3. SALU reads SGPR
- // The hazard can expire if the distance between 2 and 3 is sufficient.
- // In practice this happens <10% of the time, hence this always assumes
- // the hazard exists if 1 and 2 are present to avoid searching.
+ // 2. VALU/SALU writes SGPR
+ // 3. VALU/SALU reads SGPR
+ // The hazard can expire if the distance between 2 and 3 is sufficient,
+ // or (2) is VALU and (3) is SALU.
+ // In practice this happens <10% of the time, hence always assume the hazard
+ // exists if (1) and (2) are present to avoid searching all SGPR reads.
- const MachineOperand *SDSTOp = TII.getNamedOperand(*MI, AMDGPU::OpName::sdst);
- if (!SDSTOp || !SDSTOp->isReg())
- return false;
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ auto IgnoreableSGPR = [](const Register Reg) {
+ switch (Reg) {
+ case AMDGPU::EXEC:
+ case AMDGPU::EXEC_LO:
+ case AMDGPU::EXEC_HI:
+ case AMDGPU::M0:
+ case AMDGPU::SGPR_NULL:
+ case AMDGPU::SGPR_NULL64:
+ case AMDGPU::SCC:
+ return true;
+ default:
+ return false;
+ }
+ };
+ auto IsVCC = [](const Register Reg) {
+ return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::VCC_HI;
+ };
+
+ struct StateType {
+ SmallSet<Register, 2> HazardSGPRs;
+
+ static unsigned getHashValue(const StateType &State) {
+ return hash_combine_range(State.HazardSGPRs);
+ }
+ static bool isEqual(const StateType &LHS, const StateType &RHS) {
+ return LHS.HazardSGPRs == RHS.HazardSGPRs;
+ }
+ };
+
+ SmallVector<const MachineInstr *> WaitInstrs;
+ bool HasSGPRRead = false;
+ StateType InitialState;
+
+ // Look for SGPR write.
+ MachineOperand *HazardDef = nullptr;
+ for (MachineOperand &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ if (Op.isDef() && HazardDef)
+ continue;
+
+ Register Reg = Op.getReg();
+ if (IgnoreableSGPR(Reg))
+ continue;
+ if (!IsVCC(Reg)) {
+ if (Op.isImplicit())
+ continue;
+ if (!TRI->isSGPRReg(MRI, Reg))
+ continue;
+ }
+ // Also check for SGPR reads.
+ if (Op.isUse()) {
+ HasSGPRRead = true;
+ continue;
+ }
+
+ assert(!HazardDef);
+ HazardDef = &Op;
+ }
- const Register HazardReg = SDSTOp->getReg();
- if (HazardReg == AMDGPU::EXEC ||
- HazardReg == AMDGPU::EXEC_LO ||
- HazardReg == AMDGPU::EXEC_HI ||
- HazardReg == AMDGPU::M0)
+ if (!HazardDef)
return false;
- auto IsHazardFn = [HazardReg, this](const MachineInstr &I) {
+ // Setup to track writes to individual SGPRs
+ const Register HazardReg = HazardDef->getReg();
+ if (AMDGPU::SReg_32RegClass.contains(HazardReg)) {
+ InitialState.HazardSGPRs.insert(HazardReg);
+ } else {
+ assert(AMDGPU::SReg_64RegClass.contains(HazardReg));
+ InitialState.HazardSGPRs.insert(TRI->getSubReg(HazardReg, AMDGPU::sub0));
+ InitialState.HazardSGPRs.insert(TRI->getSubReg(HazardReg, AMDGPU::sub1));
+ }
+
+ auto IsHazardFn = [&](StateType &State, const MachineInstr &I) {
+ if (State.HazardSGPRs.empty())
+ return HazardExpired;
+
switch (I.getOpcode()) {
case AMDGPU::V_ADDC_U32_e32:
case AMDGPU::V_ADDC_U32_dpp:
@@ -3304,11 +3378,10 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
case AMDGPU::V_SUBB_U32_e32:
case AMDGPU::V_SUBB_U32_dpp:
case AMDGPU::V_SUBBREV_U32_e32:
- case AMDGPU::V_SUBBREV_U32_dpp:
+ case AMDGPU::V_SUBBREV_U32_dpp: {
// These implicitly read VCC as mask source.
- return HazardReg == AMDGPU::VCC ||
- HazardReg == AMDGPU::VCC_LO ||
- HazardReg == AMDGPU::VCC_HI;
+ return IsVCC(HazardReg) ? HazardFound : NoHazardFound;
+ }
case AMDGPU::V_ADDC_U32_e64:
case AMDGPU::V_ADDC_U32_e64_dpp:
case AMDGPU::V_CNDMASK_B16_t16_e64:
@@ -3324,68 +3397,109 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
// Only check mask register overlaps.
const MachineOperand *SSRCOp = TII.getNamedOperand(I, AMDGPU::OpName::src2);
assert(SSRCOp);
- return TRI.regsOverlap(SSRCOp->getReg(), HazardReg);
+ bool Result = TRI->regsOverlap(SSRCOp->getReg(), HazardReg);
+ return Result ? HazardFound : NoHazardFound;
}
default:
- return false;
+ return NoHazardFound;
}
};
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- auto IsExpiredFn = [&MRI, this](const MachineInstr &I, int) {
- // s_waitcnt_depctr sa_sdst(0) mitigates hazard.
- if (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- AMDGPU::DepCtr::decodeFieldSaSdst(I.getOperand(0).getImm()) == 0)
- return true;
-
- // VALU access to any SGPR or literal constant other than HazardReg
- // mitigates hazard. No need to check HazardReg here as this will
- // only be called when !IsHazardFn.
- if (!SIInstrInfo::isVALU(I))
- return false;
- for (int OpNo = 0, End = I.getNumOperands(); OpNo < End; ++OpNo) {
- const MachineOperand &Op = I.getOperand(OpNo);
- if (Op.isReg()) {
- Register OpReg = Op.getReg();
- // Only consider uses
- if (!Op.isUse())
+ const unsigned ConstantMaskBits = AMDGPU::DepCtr::encodeFieldSaSdst(
+ AMDGPU::DepCtr::encodeFieldVaSdst(AMDGPU::DepCtr::encodeFieldVaVcc(0), 0),
+ 0);
+ auto UpdateStateFn = [&](StateType &State, const MachineInstr &I) {
+ switch (I.getOpcode()) {
+ case AMDGPU::S_WAITCNT_DEPCTR:
+ // Record mergable waits within region of instructions free of SGPR reads.
+ if (!HasSGPRRead && I.getParent() == MI->getParent() && !I.isBundled() &&
+ (I.getOperand(0).getImm() & ConstantMaskBits) == ConstantMaskBits)
+ WaitInstrs.push_back(&I);
+ break;
+ default:
+ // Update tracking of SGPR reads and writes.
+ for (auto &Op : I.operands()) {
+ if (!Op.isReg())
continue;
- // Ignore EXEC
- if (OpReg == AMDGPU::EXEC ||
- OpReg == AMDGPU::EXEC_LO ||
- OpReg == AMDGPU::EXEC_HI)
+
+ Register Reg = Op.getReg();
+ if (IgnoreableSGPR(Reg))
continue;
- // Ignore all implicit uses except VCC
- if (Op.isImplicit()) {
- if (OpReg == AMDGPU::VCC ||
- OpReg == AMDGPU::VCC_LO ||
- OpReg == AMDGPU::VCC_HI)
- return true;
+ if (!IsVCC(Reg)) {
+ if (Op.isImplicit())
+ continue;
+ if (!TRI->isSGPRReg(MRI, Reg))
+ continue;
+ }
+ if (Op.isUse()) {
+ HasSGPRRead = true;
continue;
}
- if (TRI.isSGPRReg(MRI, OpReg))
- return true;
- } else {
- const MCInstrDesc &InstDesc = I.getDesc();
- const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo];
- if (!TII.isInlineConstant(Op, OpInfo))
- return true;
+
+ // Stop tracking any SGPRs with writes on the basis that they will
+ // already have an appropriate wait inserted afterwards.
+ SmallVector<Register, 2> Found;
+ for (Register SGPR : State.HazardSGPRs) {
+ if (Reg == SGPR || TRI->regsOverlap(Reg, SGPR))
+ Found.push_back(SGPR);
+ }
+ for (Register SGPR : Found)
+ State.HazardSGPRs.erase(SGPR);
}
+ break;
}
- return false;
};
// Check for hazard
- if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
- std::numeric_limits<int>::max())
+ if (!hasHazard<StateType>(InitialState, IsHazardFn, UpdateStateFn,
+ MI->getParent(),
+ std::next(MI->getReverseIterator())))
return false;
- auto NextMI = std::next(MI->getIterator());
+ // Compute counter mask
+ unsigned DepCtr =
+ IsVALU ? (IsVCC(HazardReg) ? AMDGPU::DepCtr::encodeFieldVaVcc(0)
+ : AMDGPU::DepCtr::encodeFieldVaSdst(0))
+ : AMDGPU::DepCtr::encodeFieldSaSdst(0);
+
+ // Try to merge previous waits into this one for regions with no SGPR reads.
+ if (!WaitInstrs.empty()) {
+ // Note: WaitInstrs contains const pointers, so walk backward from MI to
+ // obtain a mutable pointer to each instruction to be merged.
+ // This is expected to be a very short walk within the same block.
+ SmallVector<MachineInstr *> ToErase;
+ unsigned Found = 0;
+ for (MachineBasicBlock::reverse_iterator It = MI->getReverseIterator(),
+ End = MI->getParent()->rend();
+ Found < WaitInstrs.size() && It != End; ++It) {
+ MachineInstr *WaitMI = &*It;
+ // Find next wait instruction.
+ if (std::as_const(WaitMI) != WaitInstrs[Found])
+ continue;
+ Found++;
+ unsigned WaitMask = WaitMI->getOperand(0).getImm();
+ assert((WaitMask & ConstantMaskBits) == ConstantMaskBits);
+ DepCtr = AMDGPU::DepCtr::encodeFieldSaSdst(
+ DepCtr, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(WaitMask),
+ AMDGPU::DepCtr::decodeFieldSaSdst(DepCtr)));
+ DepCtr = AMDGPU::DepCtr::encodeFieldVaSdst(
+ DepCtr, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(WaitMask),
+ AMDGPU::DepCtr::decodeFieldVaSdst(DepCtr)));
+ DepCtr = AMDGPU::DepCtr::encodeFieldVaVcc(
+ DepCtr, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(WaitMask),
+ AMDGPU::DepCtr::decodeFieldVaVcc(DepCtr)));
+ ToErase.push_back(WaitMI);
+ }
+ assert(Found == WaitInstrs.size());
+ for (MachineInstr *WaitMI : ToErase)
+ WaitMI->eraseFromParent();
+ }
- // Add s_waitcnt_depctr sa_sdst(0) after SALU write.
+ // Add s_waitcnt_depctr after SGPR write.
+ auto NextMI = std::next(MI->getIterator());
auto NewMI = BuildMI(*MI->getParent(), NextMI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
+ .addImm(DepCtr);
// SALU write may be s_getpc in a bundle.
updateGetPCBundle(NewMI);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 313ae3d..fdba454 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1298,12 +1298,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
}
- // Use __sincos_stret if available.
- if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
- getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
- setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
- setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
- }
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
// FP-ARMv8 implements a lot of rounding-like FP operations.
if (Subtarget->hasFPARMv8Base()) {
@@ -9835,13 +9831,18 @@ static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) {
}
SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
- assert(Subtarget->isTargetDarwin());
-
// For iOS, we want to call an alternative entry point: __sincos_stret,
// return values are passed via sret.
SDLoc dl(Op);
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
+ RTLIB::Libcall LC = RTLIB::getSINCOS_STRET(ArgVT);
+ RTLIB::LibcallImpl SincosStret = getLibcallImpl(LC);
+ if (SincosStret == RTLIB::Unsupported)
+ return SDValue();
+
+ assert(Subtarget->isTargetDarwin());
+
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
auto PtrVT = getPointerTy(DAG.getDataLayout());
@@ -9871,11 +9872,9 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
Args.emplace_back(Arg, ArgTy);
- RTLIB::Libcall LC =
- (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
- const char *LibcallName = getLibcallName(LC);
- CallingConv::ID CC = getLibcallCallingConv(LC);
- SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
+ StringRef LibcallName = getLibcallImplName(SincosStret);
+ CallingConv::ID CC = getLibcallImplCallingConv(SincosStret);
+ SDValue Callee = DAG.getExternalSymbol(LibcallName.data(), getPointerTy(DL));
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index e46a393..8720460 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -904,6 +904,8 @@ public:
case Intrinsic::dx_resource_casthandle:
// NOTE: llvm.dbg.value is supported as is in DXIL.
case Intrinsic::dbg_value:
+ // NOTE: llvm.assume is supported as is in DXIL.
+ case Intrinsic::assume:
case Intrinsic::not_intrinsic:
if (F.use_empty())
F.eraseFromParent();
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 54c8972..0573f64 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -1061,8 +1061,11 @@ HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
SDValue W0 = isUndef(PredV)
? DAG.getUNDEF(MVT::i64)
: DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
- Words[IdxW].push_back(HiHalf(W0, DAG));
- Words[IdxW].push_back(LoHalf(W0, DAG));
+ if (Bytes < BitBytes) {
+ Words[IdxW].push_back(HiHalf(W0, DAG));
+ Words[IdxW].push_back(LoHalf(W0, DAG));
+ } else
+ Words[IdxW].push_back(W0);
while (Bytes < BitBytes) {
IdxW ^= 1;
@@ -1083,7 +1086,26 @@ HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
Bytes *= 2;
}
+ while (Bytes > BitBytes) {
+ IdxW ^= 1;
+ Words[IdxW].clear();
+
+ if (Bytes <= 4) {
+ for (const SDValue &W : Words[IdxW ^ 1]) {
+ SDValue T = contractPredicate(W, dl, DAG);
+ Words[IdxW].push_back(T);
+ }
+ } else {
+ for (const SDValue &W : Words[IdxW ^ 1]) {
+ Words[IdxW].push_back(W);
+ }
+ }
+ Bytes /= 2;
+ }
+
assert(Bytes == BitBytes);
+ if (BitBytes == 1 && PredTy == MVT::v2i1)
+ ByteTy = MVT::getVectorVT(MVT::i16, HwLen);
SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
@@ -3157,6 +3179,9 @@ SDValue
HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
auto *MemN = cast<MemSDNode>(Op.getNode());
+ if (!MemN->getMemoryVT().isSimple())
+ return Op;
+
MVT MemTy = MemN->getMemoryVT().getSimpleVT();
if (!isHvxPairTy(MemTy))
return Op;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 17f04d0..20fc849 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -138,6 +138,11 @@ static cl::opt<unsigned> PPCMinimumJumpTableEntries(
"ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
cl::desc("Set minimum number of entries to use a jump table on PPC"));
+static cl::opt<unsigned> PPCMinimumBitTestCmps(
+ "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
+ cl::desc("Set minimum of largest number of comparisons to use bit test for "
+ "switch on PPC."));
+
static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
"ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
cl::desc("max depth when checking alias info in GatherAllAliases()"));
@@ -1436,6 +1441,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// Re-evaluate this value on future HWs that can do better with mtctr.
setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
+ // The default minimum of largest number in a BitTest cluster is 3.
+ setMinimumBitTestCmps(PPCMinimumBitTestCmps);
+
setMinFunctionAlignment(Align(4));
setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index b0bed71c..da3efdc 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -194,6 +194,22 @@ class XX3Form_XTAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = XT{5};
}
+class XForm_RBS5<bits<6> opCode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opCode, OOL, IOL, asmstr, itin> {
+
+ bits<5> RB;
+ bits<5> RS;
+
+ let Pattern = pattern;
+
+ let Inst{6...10} = RS;
+ let Inst{11...15} = 0;
+ let Inst{16...20} = RB;
+ let Inst{21...30} = xo;
+ let Inst{31} = 0;
+}
+
class XX3Form_XTAB6_S<bits<5> xo, dag OOL, dag IOL, string asmstr,
list<dag> pattern>
: I<59, OOL, IOL, asmstr, NoItinerary> {
@@ -317,12 +333,16 @@ let Predicates = [IsISAFuture] in {
def TLBIEIO
: XForm_RSB5_UIMM2<31, 18, (outs), (ins g8rc:$RB, g8rc:$RS, u2imm:$RIC),
"tlbieio $RB, $RS, $RIC", []>;
+ def MTLPL : XForm_RBS5<31, 275, (outs), (ins gprc:$RB, gprc:$RS),
+ "mtlpl $RB, $RS", IIC_SprMTSPR, []>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def TLBIEP8
: XForm_RSB5_UIMM2_2UIMM1<31, 50, (outs),
(ins g8rc:$RB, g8rc:$RS, u2imm:$RIC,
u1imm:$PRS, u1imm:$R),
"tlbiep $RB, $RS, $RIC, $PRS, $R", []>;
+ def MTLPL8 : XForm_RBS5<31, 275, (outs), (ins g8rc:$RB, g8rc:$RS),
+ "mtlpl $RB, $RS", IIC_SprMTSPR, []>, isPPC64;
}
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 410f20e..b86020a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2572,11 +2572,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Combine sin / cos into _sincos_stret if it is available.
- if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
- getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
- setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
- setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
- }
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
if (Subtarget.isTargetWin64()) {
setOperationAction(ISD::SDIV, MVT::i128, Custom);
@@ -33067,26 +33064,30 @@ static SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) {
static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ bool isF64 = ArgVT == MVT::f64;
+
+ RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
+ const char *LibcallName = TLI.getLibcallName(LC);
+ if (!LibcallName)
+ return SDValue();
+
assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit());
// For MacOSX, we want to call an alternative entry point: __sincos_stret,
// which returns the values as { float, float } (in XMM0) or
// { double, double } (which is returned in XMM0, XMM1).
SDLoc dl(Op);
- SDValue Arg = Op.getOperand(0);
- EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
TargetLowering::ArgListTy Args;
Args.emplace_back(Arg, ArgTy);
- bool isF64 = ArgVT == MVT::f64;
// Only optimize x86_64 for now. i386 is a bit messy. For f32,
// the small struct {f32, f32} is returned in (eax, edx). For f64,
// the results are returned via SRet in memory.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
- const char *LibcallName = TLI.getLibcallName(LC);
SDValue Callee =
DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
@@ -54634,6 +54635,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
SDLoc DL(N);
// Attempt to pre-truncate inputs to arithmetic ops instead.
@@ -54652,6 +54654,39 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget))
return V;
+ // Fold trunc(srl(load(p),amt)) -> load(p+amt/8)
+ // If we're shifting down byte aligned bit chunks from a larger load for
+ // truncation, see if we can convert the shift into a pointer offset instead.
+ // Limit this to normal (non-ext) scalar integer loads.
+ if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL &&
+ Src.hasOneUse() && Src.getOperand(0).hasOneUse() &&
+ ISD::isNormalLoad(Src.getOperand(0).getNode())) {
+ auto *Ld = cast<LoadSDNode>(Src.getOperand(0));
+ if (Ld->isSimple() && VT.isByteSized() &&
+ isPowerOf2_64(VT.getSizeInBits())) {
+ SDValue ShAmt = Src.getOperand(1);
+ KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
+ // Check the shift amount is byte aligned.
+ // Check the truncation doesn't use any shifted in (zero) top bits.
+ if (KnownAmt.countMinTrailingZeros() >= 3 &&
+ KnownAmt.getMaxValue().ule(SrcVT.getSizeInBits() -
+ VT.getSizeInBits())) {
+ EVT PtrVT = Ld->getBasePtr().getValueType();
+ SDValue PtrBitOfs = DAG.getZExtOrTrunc(ShAmt, DL, PtrVT);
+ SDValue PtrByteOfs =
+ DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs,
+ DAG.getShiftAmountConstant(3, PtrVT, DL));
+ SDValue NewPtr = DAG.getMemBasePlusOffset(
+ Ld->getBasePtr(), PtrByteOfs, DL, SDNodeFlags::NoUnsignedWrap);
+ SDValue NewLoad =
+ DAG.getLoad(VT, DL, Ld->getChain(), NewPtr, Ld->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(Src.getOperand(0).getValue(1),
+ NewLoad.getValue(1));
+ return NewLoad;
+ }
+ }
+ }
+
// The bitcast source is a direct mmx result.
// Detect bitcasts between i32 to x86mmx
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 8d9933b..92fca90 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3496,7 +3496,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (isPowerOf2_64(AlignMask + 1)) {
uint64_t Offset = 0;
match(A, m_Add(m_Value(A), m_ConstantInt(Offset)));
- if (match(A, m_PtrToInt(m_Value(A)))) {
+ if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
/// Note: this doesn't preserve the offset information but merges
/// offset and alignment.
/// TODO: we can generate a GEP instead of merging the alignment with
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f939e7a..614c6eb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2148,7 +2148,7 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
return nullptr;
}
-Value *InstCombinerImpl::foldPtrToIntOfGEP(Type *IntTy, Value *Ptr) {
+Value *InstCombinerImpl::foldPtrToIntOrAddrOfGEP(Type *IntTy, Value *Ptr) {
// Look through chain of one-use GEPs.
Type *PtrTy = Ptr->getType();
SmallVector<GEPOperator *> GEPs;
@@ -2210,7 +2210,7 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
Mask->getType() == Ty)
return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
- if (Value *V = foldPtrToIntOfGEP(Ty, SrcOp))
+ if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
return replaceInstUsesWith(CI, V);
Value *Vec, *Scalar, *Index;
@@ -2228,6 +2228,21 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
}
Instruction *InstCombinerImpl::visitPtrToAddr(PtrToAddrInst &CI) {
+ Value *SrcOp = CI.getPointerOperand();
+ Type *Ty = CI.getType();
+
+ // (ptrtoaddr (ptrmask P, M))
+ // -> (and (ptrtoaddr P), M)
+ // This is generally beneficial as `and` is better supported than `ptrmask`.
+ Value *Ptr, *Mask;
+ if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),
+ m_Value(Mask)))) &&
+ Mask->getType() == Ty)
+ return BinaryOperator::CreateAnd(Builder.CreatePtrToAddr(Ptr), Mask);
+
+ if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
+ return replaceInstUsesWith(CI, V);
+
// FIXME: Implement variants of ptrtoint folds.
return commonCastTransforms(CI);
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 9c75d9a..d85e4f7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -700,7 +700,7 @@ public:
/// folded operation.
void PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN);
- Value *foldPtrToIntOfGEP(Type *IntTy, Value *Ptr);
+ Value *foldPtrToIntOrAddrOfGEP(Type *IntTy, Value *Ptr);
Instruction *foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, CmpPredicate Cond,
Instruction &I);
Instruction *foldSelectICmp(CmpPredicate Pred, SelectInst *SI, Value *RHS,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index facb0fa..f7968ab 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7543,12 +7543,13 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
}
if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
- VPIRMetadata(*Load, LVer), I->getDebugLoc());
+ Load->getAlign(), VPIRMetadata(*Load, LVer),
+ I->getDebugLoc());
StoreInst *Store = cast<StoreInst>(I);
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
- Reverse, VPIRMetadata(*Store, LVer),
- I->getDebugLoc());
+ Reverse, Store->getAlign(),
+ VPIRMetadata(*Store, LVer), I->getDebugLoc());
}
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 5b9f005..1f10058 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3179,6 +3179,9 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
protected:
Instruction &Ingredient;
+ /// Alignment information for this memory access.
+ Align Alignment;
+
/// Whether the accessed addresses are consecutive.
bool Consecutive;
@@ -3198,10 +3201,10 @@ protected:
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
std::initializer_list<VPValue *> Operands,
- bool Consecutive, bool Reverse,
+ bool Consecutive, bool Reverse, Align Alignment,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
- Consecutive(Consecutive), Reverse(Reverse) {
+ Alignment(Alignment), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
}
@@ -3242,6 +3245,9 @@ public:
return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
}
+ /// Returns the alignment of the memory access.
+ Align getAlign() const { return Alignment; }
+
/// Generate the wide load/store.
void execute(VPTransformState &State) override {
llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
@@ -3259,18 +3265,18 @@ public:
struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
public VPValue {
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
- bool Consecutive, bool Reverse,
+ bool Consecutive, bool Reverse, Align Alignment,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
- Reverse, Metadata, DL),
+ Reverse, Alignment, Metadata, DL),
VPValue(this, &Load) {
setMask(Mask);
}
VPWidenLoadRecipe *clone() override {
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
- getMask(), Consecutive, Reverse, *this,
- getDebugLoc());
+ getMask(), Consecutive, Reverse, getAlign(),
+ *this, getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
@@ -3301,8 +3307,8 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL,
VPValue *Mask)
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
- {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
- L.getDebugLoc()),
+ {Addr, &EVL}, L.isConsecutive(), L.isReverse(),
+ L.getAlign(), L, L.getDebugLoc()),
VPValue(this, &getIngredient()) {
setMask(Mask);
}
@@ -3340,16 +3346,16 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
VPValue *Mask, bool Consecutive, bool Reverse,
- const VPIRMetadata &Metadata, DebugLoc DL)
+ Align Alignment, const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
- Consecutive, Reverse, Metadata, DL) {
+ Consecutive, Reverse, Alignment, Metadata, DL) {
setMask(Mask);
}
VPWidenStoreRecipe *clone() override {
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
getStoredValue(), getMask(), Consecutive,
- Reverse, *this, getDebugLoc());
+ Reverse, getAlign(), *this, getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -3384,7 +3390,7 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
VPValue *Mask)
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
{Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
- S.isReverse(), S, S.getDebugLoc()) {
+ S.isReverse(), S.getAlign(), S, S.getDebugLoc()) {
setMask(Mask);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 931a5b7..9a63c80 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -70,6 +70,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
case VPCanonicalIVPHISC:
case VPBranchOnMaskSC:
+ case VPDerivedIVSC:
case VPFirstOrderRecurrencePHISC:
case VPReductionPHISC:
case VPScalarIVStepsSC:
@@ -86,6 +87,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenLoadEVLSC:
case VPWidenLoadSC:
case VPWidenPHISC:
+ case VPWidenPointerInductionSC:
case VPWidenSC:
case VPWidenSelectSC: {
const Instruction *I =
@@ -119,6 +121,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenIntrinsicSC:
return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
case VPBranchOnMaskSC:
+ case VPDerivedIVSC:
case VPFirstOrderRecurrencePHISC:
case VPPredInstPHISC:
case VPScalarIVStepsSC:
@@ -134,6 +137,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
+ case VPWidenPointerInductionSC:
case VPWidenSC:
case VPWidenSelectSC: {
const Instruction *I =
@@ -3358,7 +3362,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));
Type *ScalarPtrTy = Ctx.Types.inferScalarType(PtrOp);
const Align Alignment = getLoadStoreAlignment(UI);
- unsigned AS = getLoadStoreAddressSpace(UI);
+ unsigned AS = cast<PointerType>(ScalarPtrTy)->getAddressSpace();
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0));
InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost(
UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo);
@@ -3525,7 +3529,6 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
->getAddressSpace();
unsigned Opcode = isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this)
@@ -3575,7 +3578,6 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
void VPWidenLoadRecipe::execute(VPTransformState &State) {
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
bool CreateGather = !isConsecutive();
auto &Builder = State.Builder;
@@ -3630,7 +3632,6 @@ static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
bool CreateGather = !isConsecutive();
auto &Builder = State.Builder;
@@ -3674,8 +3675,8 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
// TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
// don't need to compare to the legacy cost model.
Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
- unsigned AS = getLoadStoreAddressSpace(&Ingredient);
+ unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
+ ->getAddressSpace();
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
Instruction::Load, Ty, Alignment, AS, Ctx.CostKind);
if (!Reverse)
@@ -3699,7 +3700,6 @@ void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
void VPWidenStoreRecipe::execute(VPTransformState &State) {
VPValue *StoredVPValue = getStoredValue();
bool CreateScatter = !isConsecutive();
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
auto &Builder = State.Builder;
@@ -3742,7 +3742,6 @@ void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
VPValue *StoredValue = getStoredValue();
bool CreateScatter = !isConsecutive();
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
auto &Builder = State.Builder;
@@ -3785,8 +3784,8 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
// TODO: Using getMemoryOpCost() instead of getMaskedMemoryOpCost when we
// don't need to compare to the legacy cost model.
Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
- const Align Alignment = getLoadStoreAlignment(&Ingredient);
- unsigned AS = getLoadStoreAddressSpace(&Ingredient);
+ unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
+ ->getAddressSpace();
InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
Instruction::Store, Ty, Alignment, AS, Ctx.CostKind);
if (!Reverse)
@@ -4252,7 +4251,8 @@ InstructionCost VPInterleaveBase::computeCost(ElementCount VF,
getNumDefinedValues() > 0 ? getVPValue(InsertPosIdx)
: getStoredValues()[InsertPosIdx]);
auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
- unsigned AS = getLoadStoreAddressSpace(InsertPos);
+ unsigned AS = cast<PointerType>(Ctx.Types.inferScalarType(getAddr()))
+ ->getAddressSpace();
unsigned InterleaveFactor = IG->getFactor();
auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 84817d7..d9ac26bb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -91,13 +91,14 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
NewRecipe = new VPWidenLoadRecipe(
*Load, Ingredient.getOperand(0), nullptr /*Mask*/,
- false /*Consecutive*/, false /*Reverse*/, VPIRMetadata(*Load),
- Ingredient.getDebugLoc());
+ false /*Consecutive*/, false /*Reverse*/, Load->getAlign(),
+ VPIRMetadata(*Load), Ingredient.getDebugLoc());
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
NewRecipe = new VPWidenStoreRecipe(
*Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
- VPIRMetadata(*Store), Ingredient.getDebugLoc());
+ Store->getAlign(), VPIRMetadata(*Store),
+ Ingredient.getDebugLoc());
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
@@ -130,6 +131,24 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
return true;
}
+/// Return true if we do not know how to (mechanically) hoist or sink \p R out
+/// of a loop region.
+static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) {
+ // Assumes don't alias anything or throw; as long as they're guaranteed to
+ // execute, they're safe to hoist.
+ if (match(&R, m_Intrinsic<Intrinsic::assume>()))
+ return false;
+
+ // TODO: Relax checks in the future, e.g. we could also hoist reads, if their
+ // memory location is not modified in the vector loop.
+ if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
+ return true;
+
+ // Allocas cannot be hoisted.
+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+ return RepR && RepR->getOpcode() == Instruction::Alloca;
+}
+
static bool sinkScalarOperands(VPlan &Plan) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
bool Changed = false;
@@ -1825,7 +1844,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
VPDT.properlyDominates(Previous, SinkCandidate))
return true;
- if (SinkCandidate->mayHaveSideEffects())
+ if (cannotHoistOrSinkRecipe(*SinkCandidate))
return false;
WorkList.push_back(SinkCandidate);
@@ -1865,7 +1884,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
VPRecipeBase *Previous,
VPDominatorTree &VPDT) {
- if (Previous->mayHaveSideEffects() || Previous->mayReadFromMemory())
+ if (cannotHoistOrSinkRecipe(*Previous))
return false;
// Collect recipes that need hoisting.
@@ -1912,11 +1931,6 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
return nullptr;
return HoistCandidate;
};
- auto CanHoist = [&](VPRecipeBase *HoistCandidate) {
- // Avoid hoisting candidates with side-effects, as we do not yet analyze
- // associated dependencies.
- return !HoistCandidate->mayHaveSideEffects();
- };
if (!NeedsHoisting(Previous->getVPSingleValue()))
return true;
@@ -1928,7 +1942,7 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
VPRecipeBase *Current = HoistCandidates[I];
assert(Current->getNumDefinedValues() == 1 &&
"only recipes with a single defined value expected");
- if (!CanHoist(Current))
+ if (cannotHoistOrSinkRecipe(*Current))
return false;
for (VPValue *Op : Current->operands()) {
@@ -2143,24 +2157,6 @@ void VPlanTransforms::cse(VPlan &Plan) {
static void licm(VPlan &Plan) {
VPBasicBlock *Preheader = Plan.getVectorPreheader();
- // Return true if we do not know how to (mechanically) hoist a given recipe
- // out of a loop region.
- auto CannotHoistRecipe = [](VPRecipeBase &R) {
- // Assumes don't alias anything or throw; as long as they're guaranteed to
- // execute, they're safe to hoist.
- if (match(&R, m_Intrinsic<Intrinsic::assume>()))
- return false;
-
- // TODO: Relax checks in the future, e.g. we could also hoist reads, if
- // their memory location is not modified in the vector loop.
- if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
- return true;
-
- // Allocas cannot be hoisted.
- auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
- return RepR && RepR->getOpcode() == Instruction::Alloca;
- };
-
// Hoist any loop invariant recipes from the vector loop region to the
// preheader. Preform a shallow traversal of the vector loop region, to
// exclude recipes in replicate regions. Since the top-level blocks in the
@@ -2172,7 +2168,7 @@ static void licm(VPlan &Plan) {
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- if (CannotHoistRecipe(R))
+ if (cannotHoistOrSinkRecipe(R))
continue;
if (any_of(R.operands(), [](VPValue *Op) {
return !Op->isDefinedOutsideLoopRegions();
@@ -3652,6 +3648,37 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
Sub = VecOp->getDefiningRecipe();
VecOp = Tmp;
}
+
+ // If ValB is a constant and can be safely extended, truncate it to the same
+ // type as ExtA's operand, then extend it to the same type as ExtA. This
+ // creates two uniform extends that can more easily be matched by the rest of
+ // the bundling code. The ExtB reference, ValB and operand 1 of Mul are all
+ // replaced with the new extend of the constant.
+ auto ExtendAndReplaceConstantOp = [&Ctx](VPWidenCastRecipe *ExtA,
+ VPWidenCastRecipe *&ExtB,
+ VPValue *&ValB, VPWidenRecipe *Mul) {
+ if (!ExtA || ExtB || !ValB->isLiveIn())
+ return;
+ Type *NarrowTy = Ctx.Types.inferScalarType(ExtA->getOperand(0));
+ Instruction::CastOps ExtOpc = ExtA->getOpcode();
+ const APInt *Const;
+ if (!match(ValB, m_APInt(Const)) ||
+ !llvm::canConstantBeExtended(
+ Const, NarrowTy, TTI::getPartialReductionExtendKind(ExtOpc)))
+ return;
+ // The truncate ensures that the type of each extended operand is the
+ // same, and it's been proven that the constant can be extended from
+ // NarrowTy safely. Necessary since ExtA's extended operand would be
+ // e.g. an i8, while the const will likely be an i32. This will be
+ // elided by later optimisations.
+ VPBuilder Builder(Mul);
+ auto *Trunc =
+ Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
+ Type *WideTy = Ctx.Types.inferScalarType(ExtA);
+ ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
+ Mul->setOperand(1, ExtB);
+ };
+
// Try to match reduce.add(mul(...)).
if (match(VecOp, m_Mul(m_VPValue(A), m_VPValue(B)))) {
auto *RecipeA =
@@ -3660,6 +3687,9 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe());
auto *Mul = cast<VPWidenRecipe>(VecOp->getDefiningRecipe());
+ // Convert reduce.add(mul(ext, const)) to reduce.add(mul(ext, ext(const)))
+ ExtendAndReplaceConstantOp(RecipeA, RecipeB, B, Mul);
+
// Match reduce.add/sub(mul(ext, ext)).
if (RecipeA && RecipeB && match(RecipeA, m_ZExtOrSExt(m_VPValue())) &&
match(RecipeB, m_ZExtOrSExt(m_VPValue())) &&
@@ -3669,7 +3699,6 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
cast<VPWidenRecipe>(Sub), Red);
return new VPExpressionRecipe(RecipeA, RecipeB, Mul, Red);
}
- // Match reduce.add(mul).
// TODO: Add an expression type for this variant with a negated mul
if (!Sub && IsMulAccValidAndClampRange(Mul, nullptr, nullptr, nullptr))
return new VPExpressionRecipe(Mul, Red);
@@ -3678,18 +3707,26 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
// variants.
if (Sub)
return nullptr;
- // Match reduce.add(ext(mul(ext(A), ext(B)))).
- // All extend recipes must have same opcode or A == B
- // which can be transform to reduce.add(zext(mul(sext(A), sext(B)))).
- if (match(VecOp, m_ZExtOrSExt(m_Mul(m_ZExtOrSExt(m_VPValue()),
- m_ZExtOrSExt(m_VPValue()))))) {
+
+ // Match reduce.add(ext(mul(A, B))).
+ if (match(VecOp, m_ZExtOrSExt(m_Mul(m_VPValue(A), m_VPValue(B))))) {
auto *Ext = cast<VPWidenCastRecipe>(VecOp->getDefiningRecipe());
auto *Mul = cast<VPWidenRecipe>(Ext->getOperand(0)->getDefiningRecipe());
- auto *Ext0 =
- cast<VPWidenCastRecipe>(Mul->getOperand(0)->getDefiningRecipe());
- auto *Ext1 =
- cast<VPWidenCastRecipe>(Mul->getOperand(1)->getDefiningRecipe());
- if ((Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
+ auto *Ext0 = dyn_cast_if_present<VPWidenCastRecipe>(A->getDefiningRecipe());
+ auto *Ext1 = dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe());
+
+ // reduce.add(ext(mul(ext, const)))
+ // -> reduce.add(ext(mul(ext, ext(const))))
+ ExtendAndReplaceConstantOp(Ext0, Ext1, B, Mul);
+
+ // reduce.add(ext(mul(ext(A), ext(B))))
+ // -> reduce.add(mul(wider_ext(A), wider_ext(B)))
+ // The inner extends must either have the same opcode as the outer extend or
+ // be the same, in which case the multiply can never result in a negative
+ // value and the outer extend can be folded away by doing wider
+ // extends for the operands of the mul.
+ if (Ext0 && Ext1 &&
+ (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
Ext0->getOpcode() == Ext1->getOpcode() &&
IsMulAccValidAndClampRange(Mul, Ext0, Ext1, Ext) && Mul->hasOneUse()) {
auto *NewExt0 = new VPWidenCastRecipe(
@@ -4234,10 +4271,11 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
if (auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) {
// Narrow interleave group to wide load, as transformed VPlan will only
// process one original iteration.
+ auto *LI =
+ cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
auto *L = new VPWidenLoadRecipe(
- *cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()),
- LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
- /*Reverse=*/false, {}, LoadGroup->getDebugLoc());
+ *LI, LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
+ /*Reverse=*/false, LI->getAlign(), {}, LoadGroup->getDebugLoc());
L->insertBefore(LoadGroup);
NarrowedOps.insert(L);
return L;
@@ -4280,10 +4318,11 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
Res = NarrowOp(Member0);
}
+ auto *SI =
+ cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos());
auto *S = new VPWidenStoreRecipe(
- *cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos()),
- StoreGroup->getAddr(), Res, nullptr, /*Consecutive=*/true,
- /*Reverse=*/false, {}, StoreGroup->getDebugLoc());
+ *SI, StoreGroup->getAddr(), Res, nullptr, /*Consecutive=*/true,
+ /*Reverse=*/false, SI->getAlign(), {}, StoreGroup->getDebugLoc());
S->insertBefore(StoreGroup);
StoreGroup->eraseFromParent();
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir
index 68302f5..5f98dae 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir
@@ -290,11 +290,8 @@ body: |
; CHECK-LABEL: name: s3_from_s35
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
- ; CHECK-NEXT: %ext:_(s32) = G_AND [[TRUNC]], [[C]]
- ; CHECK-NEXT: $w0 = COPY %ext(s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%val:_(s35) = G_IMPLICIT_DEF
%extract:_(s3) = G_EXTRACT %val, 0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir
index 03c28ef..b28298c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir
@@ -159,13 +159,16 @@ body: |
; CHECK-LABEL: name: test_freeze_v3s8
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s8>) = G_FREEZE [[DEF]]
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[FREEZE]](<4 x s8>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[TRUNC]](<8 x s8>)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s8>) = G_FREEZE [[UV]]
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[FREEZE]](<4 x s8>)
; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: %ext0:_(s32) = G_ZEXT [[UV]](s8)
- ; CHECK-NEXT: %ext1:_(s32) = G_ZEXT [[UV1]](s8)
- ; CHECK-NEXT: %ext2:_(s32) = G_ZEXT [[UV2]](s8)
+ ; CHECK-NEXT: %ext0:_(s32) = G_ZEXT [[UV2]](s8)
+ ; CHECK-NEXT: %ext1:_(s32) = G_ZEXT [[UV3]](s8)
+ ; CHECK-NEXT: %ext2:_(s32) = G_ZEXT [[UV4]](s8)
; CHECK-NEXT: %res:_(<4 x s32>) = G_BUILD_VECTOR %ext0(s32), %ext1(s32), %ext2(s32), %undef(s32)
; CHECK-NEXT: $q0 = COPY %res(<4 x s32>)
%x:_(<3 x s8>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir
index 858a5a2..1cf066d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir
@@ -248,21 +248,19 @@ body: |
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16)
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF2]](<4 x s8>)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<16 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 16, 16, 16, 1, 16, 16, 16, 2, 16, 16, 16, undef, undef, undef, undef)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[SHUF]](<16 x s8>)
; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<4 x s32>) = G_UITOFP [[BITCAST]](<4 x s32>)
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UITOFP]](<4 x s32>)
- ; CHECK-NEXT: G_STORE [[UV10]](s32), [[COPY]](p0) :: (store (s32), align 16)
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UITOFP]](<4 x s32>)
+ ; CHECK-NEXT: G_STORE [[UV6]](s32), [[COPY]](p0) :: (store (s32), align 16)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
- ; CHECK-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4)
+ ; CHECK-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s64)
- ; CHECK-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 8, align 8)
+ ; CHECK-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 8, align 8)
; CHECK-NEXT: G_BR %bb.1
bb.1:
liveins: $w1, $w2, $w3, $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
index 2c326902..eb30581 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
@@ -545,15 +545,18 @@ body: |
; CHECK-LABEL: name: store_6xs64
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[DEF]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[DEF]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[DEF]](s64)
; CHECK-NEXT: %ptr:_(p0) = COPY $x0
- ; CHECK-NEXT: G_STORE [[DEF]](<2 x s64>), %ptr(p0) :: (store (<2 x s64>))
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), %ptr(p0) :: (store (<2 x s64>))
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C]](s64)
- ; CHECK-NEXT: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C1]](s64)
- ; CHECK-NEXT: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32)
+ ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32)
; CHECK-NEXT: RET_ReallyLR
%val:_(<6 x s64>) = G_IMPLICIT_DEF
%ptr:_(p0) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir
index b8bdef0..737c66c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir
@@ -220,10 +220,8 @@ body: |
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UADDE]](s32), [[SEXT_INREG2]]
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDE]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV8]](s8)
+ ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32)
; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV2]], 24
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 23
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
index 52a28ad..1c5ae0d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
@@ -289,35 +289,35 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %w0(s32), [[C]]
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP2]], 1
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32)
- ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C1]](s64)
+ ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[DEF1]], [[TRUNC]](s16), [[C1]](s64)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16)
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16)
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16)
- ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s16)
- ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s16)
- ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
+ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s16)
+ ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s16)
+ ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s16)
+ ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[DEF]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>)
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<4 x s16>), [[UV11:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV8]], [[UV10]]
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s16>), [[UV7:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV4]], [[UV6]]
; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>)
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s16>), [[UV13:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV12]]
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV8]]
; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC10]], [[XOR]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index fdd0ebb..352f4e7 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -288,10 +288,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[UV]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[UV]](s32)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[DEF]](s32)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]], shufflemask(0, 1, 5, 6)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C]](s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir
index 2311be6..abfaea0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir
@@ -220,10 +220,8 @@ body: |
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[USUBE]](s32), [[SEXT_INREG2]]
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBE]](s32)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32)
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[DEF]](s8)
- ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV8]](s8)
+ ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32)
; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[MV2]], 24
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 23
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir
index 2609eb0..9726cc5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir
@@ -37,10 +37,9 @@ body: |
bb.0:
; CHECK-LABEL: name: test_implicit_def_v4s32
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: $x0 = COPY [[UV]](<2 x s32>)
- ; CHECK-NEXT: $x1 = COPY [[UV1]](<2 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $x0 = COPY [[DEF]](<2 x s32>)
+ ; CHECK-NEXT: $x1 = COPY [[DEF]](<2 x s32>)
%0:_(<4 x s32>) = G_IMPLICIT_DEF
%1:_(<2 x s32> ), %2:_(<2 x s32>) = G_UNMERGE_VALUES %0
$x0 = COPY %1
@@ -67,10 +66,9 @@ body: |
bb.0:
; CHECK-LABEL: name: test_implicit_def_v2s32
- ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; CHECK-NEXT: $w0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $w1 = COPY [[UV1]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $w0 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[DEF]](s32)
%0:_(<2 x s32>) = G_IMPLICIT_DEF
%1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0
$w0 = COPY %1
diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index 670574f2..6df6d76 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -2,16 +2,21 @@
; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for dup_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v2i8
-
define <2 x i8> @dup_v2i8(i8 %a) {
-; CHECK-LABEL: dup_v2i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: dup v0.2s, w0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: dup_v2i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: dup v0.2s, w0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: dup_v2i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v0.8b, w0
+; CHECK-GI-NEXT: umov w8, v0.b[0]
+; CHECK-GI-NEXT: umov w9, v0.b[1]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v0.s[1], w9
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%b = insertelement <2 x i8> poison, i8 %a, i64 0
%c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
@@ -19,22 +24,45 @@ entry:
}
define <2 x i8> @duplane0_v2i8(<2 x i8> %b) {
-; CHECK-LABEL: duplane0_v2i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: dup v0.2s, v0.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: duplane0_v2i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: dup v0.2s, v0.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: duplane0_v2i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov w8, v0.s[1]
+; CHECK-GI-NEXT: mov v0.b[1], w8
+; CHECK-GI-NEXT: dup v0.8b, v0.b[0]
+; CHECK-GI-NEXT: umov w8, v0.b[0]
+; CHECK-GI-NEXT: umov w9, v0.b[1]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v0.s[1], w9
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
ret <2 x i8> %c
}
define <2 x i8> @loaddup_v2i8(ptr %p) {
-; CHECK-LABEL: loaddup_v2i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr b0, [x0]
-; CHECK-NEXT: dup v0.2s, v0.s[0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: loaddup_v2i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldr b0, [x0]
+; CHECK-SD-NEXT: dup v0.2s, v0.s[0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_v2i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ld1r { v0.8b }, [x0]
+; CHECK-GI-NEXT: umov w8, v0.b[0]
+; CHECK-GI-NEXT: umov w9, v0.b[1]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v0.s[1], w9
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%a = load i8, ptr %p
%b = insertelement <2 x i8> poison, i8 %a, i64 0
@@ -43,12 +71,24 @@ entry:
}
define <2 x i8> @loaddup_str_v2i8(ptr %p) {
-; CHECK-LABEL: loaddup_str_v2i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: strb wzr, [x0]
-; CHECK-NEXT: dup v0.2s, w8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: loaddup_str_v2i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldrb w8, [x0]
+; CHECK-SD-NEXT: strb wzr, [x0]
+; CHECK-SD-NEXT: dup v0.2s, w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr b0, [x0]
+; CHECK-GI-NEXT: strb wzr, [x0]
+; CHECK-GI-NEXT: dup v0.8b, v0.b[0]
+; CHECK-GI-NEXT: umov w8, v0.b[0]
+; CHECK-GI-NEXT: umov w9, v0.b[1]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v0.s[1], w9
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%a = load i8, ptr %p
%b = insertelement <2 x i8> poison, i8 %a, i64 0
diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index e4f9efa..0504959 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -351,7 +351,6 @@ define i64 @test_many_callee_arguments(
ret i64 %ret
}
-; FIXME: The new lowering should avoid saves/restores in the probing loop.
define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{
; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
; CHECK: // %bb.0:
@@ -389,16 +388,14 @@ define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_s
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
; CHECK-NEWLOWERING-NEXT: mov x8, sp
; CHECK-NEWLOWERING-NEXT: sub x19, x8, x0
-; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
-; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
+; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEWLOWERING-NEXT: cmp sp, x19
; CHECK-NEWLOWERING-NEXT: b.le .LBB7_3
; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
-; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: str xzr, [sp]
-; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
; CHECK-NEWLOWERING-NEXT: b .LBB7_1
; CHECK-NEWLOWERING-NEXT: .LBB7_3:
; CHECK-NEWLOWERING-NEXT: mov sp, x19
diff --git a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
index 18ea07e..c753e9c 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-control-flow.ll
@@ -228,65 +228,34 @@ exit:
ret void
}
-; FIXME: The codegen for this case could be improved (by tuning weights).
-; Here the ZA save has been hoisted out of the conditional, but would be better
-; to sink it.
define void @cond_private_za_call(i1 %cond) "aarch64_inout_za" nounwind {
-; CHECK-LABEL: cond_private_za_call:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: rdsvl x8, #1
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: msub x9, x8, x8, x9
-; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEXT: tbz w0, #0, .LBB3_4
-; CHECK-NEXT: // %bb.1: // %private_za_call
-; CHECK-NEXT: sub x8, x29, #16
-; CHECK-NEXT: msr TPIDR2_EL0, x8
-; CHECK-NEXT: bl private_za_call
-; CHECK-NEXT: smstart za
-; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB3_3
-; CHECK-NEXT: // %bb.2: // %private_za_call
-; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB3_3: // %private_za_call
-; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: .LBB3_4: // %exit
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT: b shared_za_call
-;
-; CHECK-NEWLOWERING-LABEL: cond_private_za_call:
-; CHECK-NEWLOWERING: // %bb.0:
-; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEWLOWERING-NEXT: mov x29, sp
-; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
-; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
-; CHECK-NEWLOWERING-NEXT: mov x9, sp
-; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
-; CHECK-NEWLOWERING-NEXT: mov sp, x9
-; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
-; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
-; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB3_2
-; CHECK-NEWLOWERING-NEXT: // %bb.1: // %private_za_call
-; CHECK-NEWLOWERING-NEXT: bl private_za_call
-; CHECK-NEWLOWERING-NEXT: .LBB3_2: // %exit
-; CHECK-NEWLOWERING-NEXT: smstart za
-; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
-; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB3_4
-; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit
-; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEWLOWERING-NEXT: .LBB3_4: // %exit
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEWLOWERING-NEXT: mov sp, x29
-; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEWLOWERING-NEXT: b shared_za_call
+; CHECK-COMMON-LABEL: cond_private_za_call:
+; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: mov x29, sp
+; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: rdsvl x8, #1
+; CHECK-COMMON-NEXT: mov x9, sp
+; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
+; CHECK-COMMON-NEXT: mov sp, x9
+; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
+; CHECK-COMMON-NEXT: tbz w0, #0, .LBB3_4
+; CHECK-COMMON-NEXT: // %bb.1: // %private_za_call
+; CHECK-COMMON-NEXT: sub x8, x29, #16
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
+; CHECK-COMMON-NEXT: bl private_za_call
+; CHECK-COMMON-NEXT: smstart za
+; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-COMMON-NEXT: sub x0, x29, #16
+; CHECK-COMMON-NEXT: cbnz x8, .LBB3_3
+; CHECK-COMMON-NEXT: // %bb.2: // %private_za_call
+; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
+; CHECK-COMMON-NEXT: .LBB3_3: // %private_za_call
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-COMMON-NEXT: .LBB3_4: // %exit
+; CHECK-COMMON-NEXT: mov sp, x29
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: b shared_za_call
br i1 %cond, label %private_za_call, label %exit
private_za_call:
@@ -910,7 +879,7 @@ define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwin
; CHECK-NEWLOWERING-LABEL: loop_with_external_entry:
; CHECK-NEWLOWERING: // %bb.0: // %entry
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
-; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
@@ -923,23 +892,27 @@ define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwin
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %init
; CHECK-NEWLOWERING-NEXT: bl shared_za_call
; CHECK-NEWLOWERING-NEXT: .LBB11_2: // %loop.preheader
-; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
+; CHECK-NEWLOWERING-NEXT: sub x20, x29, #16
+; CHECK-NEWLOWERING-NEXT: b .LBB11_4
; CHECK-NEWLOWERING-NEXT: .LBB11_3: // %loop
+; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB11_4 Depth=1
+; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-NEWLOWERING-NEXT: tbz w19, #0, .LBB11_6
+; CHECK-NEWLOWERING-NEXT: .LBB11_4: // %loop
; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEWLOWERING-NEXT: bl private_za_call
-; CHECK-NEWLOWERING-NEXT: tbnz w19, #0, .LBB11_3
-; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
-; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_6
-; CHECK-NEWLOWERING-NEXT: // %bb.5: // %exit
+; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_3
+; CHECK-NEWLOWERING-NEXT: // %bb.5: // %loop
+; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB11_4 Depth=1
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
+; CHECK-NEWLOWERING-NEXT: b .LBB11_3
; CHECK-NEWLOWERING-NEXT: .LBB11_6: // %exit
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: mov sp, x29
-; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
index 3f35cb5..dcdc56c 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
@@ -63,25 +63,17 @@ define void @za_with_raii(i1 %fail) "aarch64_inout_za" personality ptr @__gxx_pe
; CHECK-NEXT: ldr x1, [x1, :got_lo12:typeinfo_for_char_const_ptr]
; CHECK-NEXT: bl __cxa_throw
; CHECK-NEXT: .Ltmp1: // EH_LABEL
-; CHECK-NEXT: smstart za
-; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB0_4
-; CHECK-NEXT: // %bb.3: // %throw_exception
-; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB0_4: // %throw_exception
-; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: // %bb.5: // %throw_fail
-; CHECK-NEXT: .LBB0_6: // %unwind_dtors
+; CHECK-NEXT: // %bb.3: // %throw_fail
+; CHECK-NEXT: .LBB0_4: // %unwind_dtors
; CHECK-NEXT: .Ltmp2: // EH_LABEL
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB0_8
-; CHECK-NEXT: // %bb.7: // %unwind_dtors
+; CHECK-NEXT: cbnz x8, .LBB0_6
+; CHECK-NEXT: // %bb.5: // %unwind_dtors
; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB0_8: // %unwind_dtors
+; CHECK-NEXT: .LBB0_6: // %unwind_dtors
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: bl shared_za_call
; CHECK-NEXT: sub x8, x29, #16
diff --git a/llvm/test/CodeGen/AArch64/sme-za-function-with-many-blocks.ll b/llvm/test/CodeGen/AArch64/sme-za-function-with-many-blocks.ll
new file mode 100644
index 0000000..0306b27
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-za-function-with-many-blocks.ll
@@ -0,0 +1,296 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -aarch64-new-sme-abi < %s | FileCheck %s
+
+; This test case was generated by lowering mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir to LLVM IR.
+; The actual contents of the function are not that important. The main interesting quality here is that many blocks
+; don't directly use ZA. The only blocks that require ZA are the MOPA (and load/stores) in the inner loop, and the
+;`printMemrefF32()` call in the exit block.
+;
+; If ZA states are not propagated in the MachineSMEABIPass block %48 (which is within the outer loop), will
+; have an edge to block %226 (the exit block), which requires ZA in the "saved" state, and an edge to block %51
+; (which has no preference on ZA state). This means block %48 will also end up in the locally saved state.
+; This is not really what we want, as it means we will save/restore ZA in the outer loop. We can fix this by
+; propagating the "active" state from the inner loop through basic blocks with no preference, to ensure the outer
+; loop is in the "active" state too.
+;
+; If done correctly, the only ZA save/restore should be in the exit block (with all other blocks in the active state).
+
+define void @matmul(ptr %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, ptr %7, ptr %8, i64 %9, i64 %10, i64 %11, i64 %12, i64 %13, ptr %14, ptr %15, i64 %16, i64 %17, i64 %18, i64 %19, i64 %20) #0 {
+; Check for a ZA zero in the entry block, then no uses of TPIDR2_EL0 (for ZA saves/restore)
+; until the exit block (which contains the call to printMemrefF32).
+;
+; CHECK-LABEL: matmul:
+; CHECK: zero {za}
+; CHECK-NOT: TPIDR2_EL0
+; CHECK: msr TPIDR2_EL0, x{{.*}}
+; CHECK-NOT: .LBB{{.*}}
+; CHECK: bl printMemrefF32
+ %22 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } poison, ptr %14, 0
+ %23 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %22, ptr %15, 1
+ %24 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %23, i64 %16, 2
+ %25 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %24, i64 %17, 3, 0
+ %26 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %25, i64 %19, 4, 0
+ %27 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %26, i64 %18, 3, 1
+ %28 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %27, i64 %20, 4, 1
+ %29 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } poison, ptr %7, 0
+ %30 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %29, ptr %8, 1
+ %31 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %30, i64 %9, 2
+ %32 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %31, i64 %10, 3, 0
+ %33 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %32, i64 %12, 4, 0
+ %34 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %33, i64 %11, 3, 1
+ %35 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %34, i64 %13, 4, 1
+ %36 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } poison, ptr %0, 0
+ %37 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %36, ptr %1, 1
+ %38 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %37, i64 %2, 2
+ %39 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %38, i64 %3, 3, 0
+ %40 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %39, i64 %5, 4, 0
+ %41 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %40, i64 %4, 3, 1
+ %42 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %41, i64 %6, 4, 1
+ %43 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %42, 3, 0
+ %44 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %42, 3, 1
+ %45 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %35, 3, 1
+ %46 = call i64 @llvm.vscale.i64()
+ %47 = mul i64 %46, 4
+ br label %48
+
+48: ; preds = %224, %21
+ %49 = phi i64 [ %225, %224 ], [ 0, %21 ]
+ %50 = icmp slt i64 %49, %43
+ br i1 %50, label %51, label %226
+
+51: ; preds = %48
+ %52 = sub i64 %43, %49
+ %53 = call i64 @llvm.smin.i64(i64 %47, i64 %52)
+ %54 = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
+ %55 = trunc i64 %53 to i32
+ %56 = insertelement <vscale x 4 x i32> poison, i32 %55, i32 0
+ %57 = shufflevector <vscale x 4 x i32> %56, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %58 = icmp slt <vscale x 4 x i32> %54, %57
+ br label %59
+
+59: ; preds = %222, %51
+ %60 = phi i64 [ %223, %222 ], [ 0, %51 ]
+ %61 = icmp slt i64 %60, %45
+ br i1 %61, label %62, label %224
+
+62: ; preds = %59
+ %63 = sub i64 %45, %60
+ %64 = call i64 @llvm.smin.i64(i64 %47, i64 %63)
+ %65 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %28, 0
+ %66 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %28, 1
+ %67 = insertvalue { ptr, ptr, i64 } poison, ptr %65, 0
+ %68 = insertvalue { ptr, ptr, i64 } %67, ptr %66, 1
+ %69 = insertvalue { ptr, ptr, i64 } %68, i64 0, 2
+ %70 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %28, 2
+ %71 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %28, 3, 0
+ %72 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %28, 3, 1
+ %73 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %28, 4, 0
+ %74 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %28, 4, 1
+ %75 = mul nsw i64 %49, %73
+ %76 = add i64 %70, %75
+ %77 = mul nsw i64 %60, %74
+ %78 = add i64 %76, %77
+ %79 = extractvalue { ptr, ptr, i64 } %69, 0
+ %80 = extractvalue { ptr, ptr, i64 } %69, 1
+ %81 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } poison, ptr %79, 0
+ %82 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %81, ptr %80, 1
+ %83 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %82, i64 %78, 2
+ %84 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %83, i64 %53, 3, 0
+ %85 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %84, i64 %73, 4, 0
+ %86 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %85, i64 %64, 3, 1
+ %87 = insertvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %86, i64 %74, 4, 1
+ %88 = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
+ %89 = trunc i64 %64 to i32
+ %90 = insertelement <vscale x 4 x i32> poison, i32 %89, i32 0
+ %91 = shufflevector <vscale x 4 x i32> %90, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %92 = icmp slt <vscale x 4 x i32> %88, %91
+ br label %93
+
+93: ; preds = %220, %62
+ %94 = phi i64 [ %221, %220 ], [ 0, %62 ]
+ %95 = icmp slt i64 %94, %44
+ br i1 %95, label %96, label %222
+
+96: ; preds = %93
+ %97 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %42, 0
+ %98 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %42, 1
+ %99 = insertvalue { ptr, ptr, i64 } poison, ptr %97, 0
+ %100 = insertvalue { ptr, ptr, i64 } %99, ptr %98, 1
+ %101 = insertvalue { ptr, ptr, i64 } %100, i64 0, 2
+ %102 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %42, 2
+ %103 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %42, 3, 0
+ %104 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %42, 3, 1
+ %105 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %42, 4, 0
+ %106 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %42, 4, 1
+ %107 = mul nsw i64 %49, %105
+ %108 = add i64 %102, %107
+ %109 = mul nsw i64 %94, %106
+ %110 = add i64 %108, %109
+ %111 = extractvalue { ptr, ptr, i64 } %101, 0
+ %112 = extractvalue { ptr, ptr, i64 } %101, 1
+ %113 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } poison, ptr %111, 0
+ %114 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %113, ptr %112, 1
+ %115 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %114, i64 %110, 2
+ %116 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %115, i64 %53, 3, 0
+ %117 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %116, i64 %105, 4, 0
+ br label %118
+
+118: ; preds = %133, %96
+ %119 = phi i64 [ %135, %133 ], [ 0, %96 ]
+ %120 = phi <vscale x 4 x float> [ %134, %133 ], [ poison, %96 ]
+ %121 = icmp slt i64 %119, %47
+ br i1 %121, label %122, label %136
+
+122: ; preds = %118
+ %123 = extractelement <vscale x 4 x i1> %58, i64 %119
+ br i1 %123, label %124, label %133
+
+124: ; preds = %122
+ %125 = extractvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %117, 1
+ %126 = extractvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %117, 2
+ %127 = getelementptr float, ptr %125, i64 %126
+ %128 = extractvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %117, 4, 0
+ %129 = mul nuw nsw i64 %119, %128
+ %130 = getelementptr inbounds nuw float, ptr %127, i64 %129
+ %131 = load float, ptr %130, align 4
+ %132 = insertelement <vscale x 4 x float> %120, float %131, i64 %119
+ br label %133
+
+133: ; preds = %124, %122
+ %134 = phi <vscale x 4 x float> [ %132, %124 ], [ %120, %122 ]
+ %135 = add i64 %119, 1
+ br label %118
+
+136: ; preds = %118
+ %137 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %35, 0
+ %138 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %35, 1
+ %139 = insertvalue { ptr, ptr, i64 } poison, ptr %137, 0
+ %140 = insertvalue { ptr, ptr, i64 } %139, ptr %138, 1
+ %141 = insertvalue { ptr, ptr, i64 } %140, i64 0, 2
+ %142 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %35, 2
+ %143 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %35, 3, 0
+ %144 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %35, 3, 1
+ %145 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %35, 4, 0
+ %146 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %35, 4, 1
+ %147 = mul nsw i64 %94, %145
+ %148 = add i64 %142, %147
+ %149 = mul nsw i64 %60, %146
+ %150 = add i64 %148, %149
+ %151 = extractvalue { ptr, ptr, i64 } %141, 0
+ %152 = extractvalue { ptr, ptr, i64 } %141, 1
+ %153 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } poison, ptr %151, 0
+ %154 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %153, ptr %152, 1
+ %155 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %154, i64 %150, 2
+ %156 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %155, i64 %64, 3, 0
+ %157 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %156, i64 %146, 4, 0
+ br label %158
+
+158: ; preds = %173, %136
+ %159 = phi i64 [ %175, %173 ], [ 0, %136 ]
+ %160 = phi <vscale x 4 x float> [ %174, %173 ], [ poison, %136 ]
+ %161 = icmp slt i64 %159, %47
+ br i1 %161, label %162, label %176
+
+162: ; preds = %158
+ %163 = extractelement <vscale x 4 x i1> %92, i64 %159
+ br i1 %163, label %164, label %173
+
+164: ; preds = %162
+ %165 = extractvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %157, 1
+ %166 = extractvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %157, 2
+ %167 = getelementptr float, ptr %165, i64 %166
+ %168 = extractvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %157, 4, 0
+ %169 = mul nuw nsw i64 %159, %168
+ %170 = getelementptr inbounds nuw float, ptr %167, i64 %169
+ %171 = load float, ptr %170, align 4
+ %172 = insertelement <vscale x 4 x float> %160, float %171, i64 %159
+ br label %173
+
+173: ; preds = %164, %162
+ %174 = phi <vscale x 4 x float> [ %172, %164 ], [ %160, %162 ]
+ %175 = add i64 %159, 1
+ br label %158
+
+176: ; preds = %158
+ %177 = trunc i64 %64 to i32
+ br label %178
+
+178: ; preds = %181, %176
+ %179 = phi i64 [ %202, %181 ], [ 0, %176 ]
+ %180 = icmp slt i64 %179, %47
+ br i1 %180, label %181, label %203
+
+181: ; preds = %178
+ %182 = icmp ult i64 %179, %53
+ %183 = sext i1 %182 to i32
+ %184 = and i32 %183, %177
+ %185 = sext i32 %184 to i64
+ %186 = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
+ %187 = trunc i64 %185 to i32
+ %188 = insertelement <vscale x 4 x i32> poison, i32 %187, i32 0
+ %189 = shufflevector <vscale x 4 x i32> %188, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %190 = icmp slt <vscale x 4 x i32> %186, %189
+ %191 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %87, 1
+ %192 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %87, 2
+ %193 = getelementptr float, ptr %191, i64 %192
+ %194 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %87, 4, 0
+ %195 = mul i64 %179, %194
+ %196 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %87, 4, 1
+ %197 = mul i64 0, %196
+ %198 = add i64 %195, %197
+ %199 = getelementptr float, ptr %193, i64 %198
+ %200 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %199, i32 4, <vscale x 4 x i1> %190, <vscale x 4 x float> poison)
+ %201 = trunc i64 %179 to i32
+ call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 %201, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %200)
+ %202 = add i64 %179, 1
+ br label %178
+
+203: ; preds = %178
+ call void @llvm.aarch64.sme.mopa.nxv4f32(i32 0, <vscale x 4 x i1> %58, <vscale x 4 x i1> %92, <vscale x 4 x float> %120, <vscale x 4 x float> %160)
+ %204 = call i64 @llvm.smin.i64(i64 %53, i64 %47)
+ br label %205
+
+205: ; preds = %208, %203
+ %206 = phi i64 [ %219, %208 ], [ 0, %203 ]
+ %207 = icmp slt i64 %206, %204
+ br i1 %207, label %208, label %220
+
+208: ; preds = %205
+ %209 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %87, 1
+ %210 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %87, 2
+ %211 = getelementptr float, ptr %209, i64 %210
+ %212 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %87, 4, 0
+ %213 = mul i64 %206, %212
+ %214 = extractvalue { ptr, ptr, i64, [2 x i64], [2 x i64] } %87, 4, 1
+ %215 = mul i64 0, %214
+ %216 = add i64 %213, %215
+ %217 = getelementptr float, ptr %211, i64 %216
+ %218 = trunc i64 %206 to i32
+ call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> %92, ptr %217, i32 0, i32 %218)
+ %219 = add i64 %206, 1
+ br label %205
+
+220: ; preds = %205
+ %221 = add i64 %94, 1
+ br label %93
+
+222: ; preds = %93
+ %223 = add i64 %60, %47
+ br label %59
+
+224: ; preds = %59
+ %225 = add i64 %49, %47
+ br label %48
+
+226: ; preds = %48
+ %227 = alloca { ptr, ptr, i64, [2 x i64], [2 x i64] }, i64 1, align 8
+ store { ptr, ptr, i64, [2 x i64], [2 x i64] } %28, ptr %227, align 8
+ %228 = insertvalue { i64, ptr } { i64 2, ptr poison }, ptr %227, 1
+ %229 = extractvalue { i64, ptr } %228, 0
+ %230 = extractvalue { i64, ptr } %228, 1
+ call void @printMemrefF32(i64 %229, ptr %230)
+ ret void
+}
+
+declare void @printMemrefF32(i64, ptr)
+
+attributes #0 = { "aarch64_new_za" "aarch64_pstate_sm_body" }
diff --git a/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll b/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
index 066ee3b..afd56d1 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
@@ -12,77 +12,41 @@ entry:
}
define float @multi_bb_stpidr2_save_required(i32 %a, float %b, float %c) "aarch64_inout_za" {
-; CHECK-LABEL: multi_bb_stpidr2_save_required:
-; CHECK: // %bb.0:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa w29, 16
-; CHECK-NEXT: .cfi_offset w30, -8
-; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: rdsvl x8, #1
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: msub x9, x8, x8, x9
-; CHECK-NEXT: mov sp, x9
-; CHECK-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEXT: cbz w0, .LBB1_2
-; CHECK-NEXT: // %bb.1: // %use_b
-; CHECK-NEXT: fmov s1, #4.00000000
-; CHECK-NEXT: fadd s0, s0, s1
-; CHECK-NEXT: b .LBB1_5
-; CHECK-NEXT: .LBB1_2: // %use_c
-; CHECK-NEXT: fmov s0, s1
-; CHECK-NEXT: sub x8, x29, #16
-; CHECK-NEXT: msr TPIDR2_EL0, x8
-; CHECK-NEXT: bl cosf
-; CHECK-NEXT: smstart za
-; CHECK-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEXT: sub x0, x29, #16
-; CHECK-NEXT: cbnz x8, .LBB1_4
-; CHECK-NEXT: // %bb.3: // %use_c
-; CHECK-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEXT: .LBB1_4: // %use_c
-; CHECK-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEXT: .LBB1_5: // %exit
-; CHECK-NEXT: mov sp, x29
-; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEXT: ret
-;
-; CHECK-NEWLOWERING-LABEL: multi_bb_stpidr2_save_required:
-; CHECK-NEWLOWERING: // %bb.0:
-; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEWLOWERING-NEXT: mov x29, sp
-; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
-; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa w29, 16
-; CHECK-NEWLOWERING-NEXT: .cfi_offset w30, -8
-; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
-; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
-; CHECK-NEWLOWERING-NEXT: mov x9, sp
-; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
-; CHECK-NEWLOWERING-NEXT: mov sp, x9
-; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
-; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
-; CHECK-NEWLOWERING-NEXT: cbz w0, .LBB1_2
-; CHECK-NEWLOWERING-NEXT: // %bb.1: // %use_b
-; CHECK-NEWLOWERING-NEXT: fmov s1, #4.00000000
-; CHECK-NEWLOWERING-NEXT: fadd s0, s0, s1
-; CHECK-NEWLOWERING-NEXT: b .LBB1_3
-; CHECK-NEWLOWERING-NEXT: .LBB1_2: // %use_c
-; CHECK-NEWLOWERING-NEXT: fmov s0, s1
-; CHECK-NEWLOWERING-NEXT: bl cosf
-; CHECK-NEWLOWERING-NEXT: .LBB1_3: // %exit
-; CHECK-NEWLOWERING-NEXT: smstart za
-; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
-; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
-; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB1_5
-; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit
-; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
-; CHECK-NEWLOWERING-NEXT: .LBB1_5: // %exit
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
-; CHECK-NEWLOWERING-NEXT: mov sp, x29
-; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
-; CHECK-NEWLOWERING-NEXT: ret
+; CHECK-COMMON-LABEL: multi_bb_stpidr2_save_required:
+; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: mov x29, sp
+; CHECK-COMMON-NEXT: sub sp, sp, #16
+; CHECK-COMMON-NEXT: .cfi_def_cfa w29, 16
+; CHECK-COMMON-NEXT: .cfi_offset w30, -8
+; CHECK-COMMON-NEXT: .cfi_offset w29, -16
+; CHECK-COMMON-NEXT: rdsvl x8, #1
+; CHECK-COMMON-NEXT: mov x9, sp
+; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
+; CHECK-COMMON-NEXT: mov sp, x9
+; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
+; CHECK-COMMON-NEXT: cbz w0, .LBB1_2
+; CHECK-COMMON-NEXT: // %bb.1: // %use_b
+; CHECK-COMMON-NEXT: fmov s1, #4.00000000
+; CHECK-COMMON-NEXT: fadd s0, s0, s1
+; CHECK-COMMON-NEXT: b .LBB1_5
+; CHECK-COMMON-NEXT: .LBB1_2: // %use_c
+; CHECK-COMMON-NEXT: fmov s0, s1
+; CHECK-COMMON-NEXT: sub x8, x29, #16
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
+; CHECK-COMMON-NEXT: bl cosf
+; CHECK-COMMON-NEXT: smstart za
+; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-COMMON-NEXT: sub x0, x29, #16
+; CHECK-COMMON-NEXT: cbnz x8, .LBB1_4
+; CHECK-COMMON-NEXT: // %bb.3: // %use_c
+; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
+; CHECK-COMMON-NEXT: .LBB1_4: // %use_c
+; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-COMMON-NEXT: .LBB1_5: // %exit
+; CHECK-COMMON-NEXT: mov sp, x29
+; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: ret
%cmp = icmp ne i32 %a, 0
br i1 %cmp, label %use_b, label %use_c
@@ -155,7 +119,9 @@ define float @multi_bb_stpidr2_save_required_stackprobe(i32 %a, float %b, float
; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
+; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
+; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEWLOWERING-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEWLOWERING-NEXT: cmp sp, x9
@@ -166,9 +132,7 @@ define float @multi_bb_stpidr2_save_required_stackprobe(i32 %a, float %b, float
; CHECK-NEWLOWERING-NEXT: .LBB2_3:
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: ldr xzr, [sp]
-; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
-; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEWLOWERING-NEXT: cbz w0, .LBB2_5
; CHECK-NEWLOWERING-NEXT: // %bb.4: // %use_b
; CHECK-NEWLOWERING-NEXT: fmov s1, #4.00000000
diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
index 2583a93..5b81f5d 100644
--- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
+++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll
@@ -426,3 +426,21 @@ define void @zt0_multiple_private_za_calls(ptr %callee) "aarch64_in_zt0" nounwin
call void %callee()
ret void
}
+
+define void @disable_tailcallopt(ptr %callee) "aarch64_inout_zt0" nounwind {
+; CHECK-COMMON-LABEL: disable_tailcallopt:
+; CHECK-COMMON: // %bb.0:
+; CHECK-COMMON-NEXT: sub sp, sp, #80
+; CHECK-COMMON-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-COMMON-NEXT: mov x19, sp
+; CHECK-COMMON-NEXT: str zt0, [x19]
+; CHECK-COMMON-NEXT: smstop za
+; CHECK-COMMON-NEXT: blr x0
+; CHECK-COMMON-NEXT: smstart za
+; CHECK-COMMON-NEXT: ldr zt0, [x19]
+; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-COMMON-NEXT: add sp, sp, #80
+; CHECK-COMMON-NEXT: ret
+ tail call void %callee()
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir
index ae683ec..366b1ed 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir
@@ -137,10 +137,10 @@ body: |
bb.0:
; Test that trunc(trunc) is combined to a single trunc
; CHECK-LABEL: name: trunc_trunc
- ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
- %0:_(s64) = G_IMPLICIT_DEF
+ %0:_(s64) = COPY $sgpr0_sgpr1
%1:_(s48) = G_TRUNC %0
%2:_(s32) = G_TRUNC %1
$vgpr0 = COPY %2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir
index 8b19d7d..88cb1ab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-abs.mir
@@ -345,36 +345,20 @@ body: |
; SI-LABEL: name: test_abs_v3s16
; SI: liveins: $vgpr0
; SI-NEXT: {{ $}}
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
- ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG]]
- ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
- ; SI-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG1]]
- ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16
- ; SI-NEXT: [[ABS2:%[0-9]+]]:_(s32) = G_ABS [[SEXT_INREG2]]
+ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[C]]
+ ; SI-NEXT: [[ABS1:%[0-9]+]]:_(s32) = G_ABS [[C]]
+ ; SI-NEXT: [[ABS2:%[0-9]+]]:_(s32) = G_ABS [[C]]
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ABS]](s32), [[ABS1]](s32), [[ABS2]](s32)
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
;
; VI-LABEL: name: test_abs_v3s16
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]]
- ; VI-NEXT: [[ABS1:%[0-9]+]]:_(s16) = G_ABS [[TRUNC1]]
- ; VI-NEXT: [[ABS2:%[0-9]+]]:_(s16) = G_ABS [[TRUNC2]]
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[DEF]]
+ ; VI-NEXT: [[ABS1:%[0-9]+]]:_(s16) = G_ABS [[DEF]]
+ ; VI-NEXT: [[ABS2:%[0-9]+]]:_(s16) = G_ABS [[DEF]]
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16)
; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS1]](s16)
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS2]](s16)
@@ -384,18 +368,16 @@ body: |
; GFX9-LABEL: name: test_abs_v3s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(<2 x s16>) = G_ABS [[BUILD_VECTOR]]
+ ; GFX9-NEXT: [[ABS1:%[0-9]+]]:_(s16) = G_ABS [[DEF]]
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ABS]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[ABS:%[0-9]+]]:_(<2 x s16>) = G_ABS [[UV]]
- ; GFX9-NEXT: [[ABS1:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]]
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[ABS]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST1]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
- ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
+ ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x s32>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_ABS %0
%2:_(<3 x s32>) = G_ANYEXT %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
index c734711..83ca323 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
@@ -373,22 +373,17 @@ body: |
bb.0:
; CHECK-LABEL: name: test_and_v5s32
- ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
- ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV9]]
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>)
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](<2 x s32>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[AND2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[AND2]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>)
%0:_(<5 x s32>) = G_IMPLICIT_DEF
%1:_(<5 x s32>) = G_IMPLICIT_DEF
@@ -456,24 +451,12 @@ body: |
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[BITCAST]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
@@ -511,63 +494,62 @@ body: |
bb.0:
; CHECK-LABEL: name: test_and_v5s16
- ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL4]]
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL5]]
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
- ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]]
- ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL6]]
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL7]]
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]]
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND3]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
- ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32)
- ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]]
- ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]]
- ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
- ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL5]]
- ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32)
- ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]]
- ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]]
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND1]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
+ ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]]
+ ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]]
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C2]]
+ ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL9]]
+ ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+ ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL10]]
+ ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+ ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL11]]
+ ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>)
%0:_(<5 x s16>) = G_IMPLICIT_DEF
%1:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -583,13 +565,10 @@ body: |
bb.0:
; CHECK-LABEL: name: test_and_v3s8
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]]
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]]
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s8>) = G_IMPLICIT_DEF
@@ -605,14 +584,11 @@ body: |
bb.0:
; CHECK-LABEL: name: test_and_v4s8
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]]
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV7]]
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s8>) = G_IMPLICIT_DEF
@@ -628,18 +604,15 @@ body: |
bb.0:
; CHECK-LABEL: name: test_and_v8s8
- ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<8 x s32>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<8 x s32>)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV8]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV9]]
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV10]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV11]]
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV12]]
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV13]]
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV14]]
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV15]]
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
%0:_(<8 x s8>) = G_IMPLICIT_DEF
@@ -655,28 +628,23 @@ body: |
bb.0:
; CHECK-LABEL: name: test_and_v16s8
- ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV16]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV17]]
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV18]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV19]]
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV20]]
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV21]]
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV22]]
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV23]]
- ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
- ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>)
- ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[UV56]]
- ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[UV57]]
- ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[UV58]]
- ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[UV59]]
- ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV44]], [[UV60]]
- ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV45]], [[UV61]]
- ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV46]], [[UV62]]
- ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV47]], [[UV63]]
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[DEF]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32), [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32), [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>)
%0:_(<16 x s8>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
index a585fa7..6ff6316 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
@@ -649,9 +649,8 @@ body: |
; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32)
; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32)
; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[DEF]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; VI-LABEL: name: test_ashr_v3s64_v3s32
@@ -664,9 +663,8 @@ body: |
; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32)
; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32)
; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[DEF]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9PLUS-LABEL: name: test_ashr_v3s64_v3s32
@@ -679,9 +677,8 @@ body: |
; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32)
; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32)
; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32)
- ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64)
+ ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[DEF]](s64)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%1:_(<3 x s64>) = G_EXTRACT %0, 0
@@ -857,26 +854,20 @@ body: |
; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16
; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[AND1]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C1]]
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]]
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]]
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C1]]
- ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]]
- ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL2]]
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_ashr_v3s16_v3s16
@@ -902,27 +893,20 @@ body: |
; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC3]](s16)
; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC1]], [[TRUNC4]](s16)
; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC2]], [[TRUNC5]](s16)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR1]](s16)
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ASHR2]](s16)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9PLUS-LABEL: name: test_ashr_v3s16_v3s16
@@ -945,17 +929,10 @@ body: |
; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
+ ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR1]](s16), [[TRUNC4]](s16)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[ASHR1]](s16), [[DEF]](s16)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
index 724d581..f7f7732 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
@@ -233,9 +233,8 @@ body: |
bb.0:
; CHECK-LABEL: name: extract_vector_elt_0_v2i8_i32
- ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
%0:_(<2 x s8>) = G_IMPLICIT_DEF
%1:_(s32) = G_CONSTANT i32 0
@@ -270,9 +269,8 @@ body: |
bb.0:
; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i32
- ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
%0:_(<2 x s1>) = G_IMPLICIT_DEF
%1:_(s32) = G_CONSTANT i32 0
@@ -288,9 +286,8 @@ body: |
bb.0:
; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i1
- ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
%0:_(<2 x s1>) = G_IMPLICIT_DEF
%1:_(s1) = G_CONSTANT i1 false
@@ -983,9 +980,8 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_0_v8i64
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<8 x s64>)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64)
%0:_(<8 x s64>) = G_IMPLICIT_DEF
%1:_(s32) = G_CONSTANT i32 0
@@ -1003,9 +999,8 @@ body: |
; CHECK-LABEL: name: extract_vector_elt_0_v16i64
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64)
%0:_(<16 x s64>) = G_IMPLICIT_DEF
%1:_(s32) = G_CONSTANT i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir
index 6630300..d981769 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir
@@ -516,9 +516,8 @@ name: extract_s8_v4s8_offset0
body: |
bb.0:
; CHECK-LABEL: name: extract_s8_v4s8_offset0
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(<4 x s8>) = G_IMPLICIT_DEF
%1:_(s8) = G_EXTRACT %0, 0
%2:_(s32) = G_ANYEXT %1
@@ -530,9 +529,8 @@ name: extract_s8_v4s8_offset8
body: |
bb.0:
; CHECK-LABEL: name: extract_s8_v4s8_offset8
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(<4 x s8>) = G_IMPLICIT_DEF
%1:_(s8) = G_EXTRACT %0, 8
%2:_(s32) = G_ANYEXT %1
@@ -544,9 +542,8 @@ name: extract_s8_v4s8_offset16
body: |
bb.0:
; CHECK-LABEL: name: extract_s8_v4s8_offset16
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(<4 x s8>) = G_IMPLICIT_DEF
%1:_(s8) = G_EXTRACT %0, 16
%2:_(s32) = G_ANYEXT %1
@@ -558,9 +555,8 @@ name: extract_s8_v4s8_offset24
body: |
bb.0:
; CHECK-LABEL: name: extract_s8_v4s8_offset24
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(<4 x s8>) = G_IMPLICIT_DEF
%1:_(s8) = G_EXTRACT %0, 24
%2:_(s32) = G_ANYEXT %1
@@ -573,9 +569,8 @@ name: extract_s8_v3s8_offset16
body: |
bb.0:
; CHECK-LABEL: name: extract_s8_v3s8_offset16
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(<3 x s8>) = G_IMPLICIT_DEF
%1:_(s8) = G_EXTRACT %0, 16
%2:_(s32) = G_ANYEXT %1
@@ -587,9 +582,8 @@ name: extract_s8_v5s1_offset4
body: |
bb.0:
; CHECK-LABEL: name: extract_s8_v5s1_offset4
- ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV4]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(<5 x s1>) = G_IMPLICIT_DEF
%1:_(s1) = G_EXTRACT %0, 4
%2:_(s32) = G_ANYEXT %1
@@ -601,9 +595,8 @@ name: extract_v2s16_v4s16_offset32
body: |
bb.0:
; CHECK-LABEL: name: extract_v2s16_v4s16_offset32
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](<2 x s16>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>)
%0:_(<4 x s16>) = G_IMPLICIT_DEF
%1:_(<2 x s16>) = G_EXTRACT %0, 32
$vgpr0 = COPY %1
@@ -614,9 +607,8 @@ name: extract_v2s16_v6s16_offset32
body: |
bb.0:
; CHECK-LABEL: name: extract_v2s16_v6s16_offset32
- ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](<2 x s16>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>)
%0:_(<6 x s16>) = G_IMPLICIT_DEF
%1:_(<2 x s16>) = G_EXTRACT %0, 32
$vgpr0 = COPY %1
@@ -868,10 +860,8 @@ body: |
bb.0:
; CHECK-LABEL: name: extract_s16_v3s16_offset0
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(s16) = G_EXTRACT %0, 0
%2:_(s32) = G_ANYEXT %1
@@ -957,9 +947,12 @@ body: |
bb.0:
; CHECK-LABEL: name: extract_v2s16_v3s16_offset0
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<2 x s16>) = G_EXTRACT %0, 0
$vgpr0 = COPY %1
@@ -971,9 +964,12 @@ body: |
bb.0:
; CHECK-LABEL: name: extract_v2s16_v5s16_offset0
- ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
%0:_(<5 x s16>) = G_IMPLICIT_DEF
%1:_(<2 x s16>) = G_EXTRACT %0, 0
$vgpr0 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
index d21526a..571bd10 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
@@ -266,85 +266,78 @@ body: |
bb.0:
; SI-LABEL: name: test_fabs_v3s16
- ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
- ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]]
+ ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32)
+ ; SI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST]]
; SI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST1]]
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
- ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32)
+ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL1]]
; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]]
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]]
; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL3]]
; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_fabs_v3s16
- ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
- ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]]
+ ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32)
+ ; VI-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST]]
; VI-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST1]]
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
- ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32)
+ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL1]]
; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]]
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]]
; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL3]]
; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_fabs_v3s16
- ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR]]
+ ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16)
- ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[UV]]
- ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR]]
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>)
; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_FABS %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir
index b209ef3..b48f566 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir
@@ -425,27 +425,19 @@ body: |
; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
; SI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]]
; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_fadd_v3s16
@@ -471,27 +463,19 @@ body: |
; VI-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC3]]
; VI-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[TRUNC4]]
; VI-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[TRUNC5]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16)
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_fadd_v3s16
@@ -500,35 +484,21 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16)
; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[UV]], [[UV3]]
- ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FADD]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FADD1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>)
+ ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[UV1]], [[UV4]]
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FADD]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FADD1]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir
index d8b0439..b95b696 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir
@@ -18,12 +18,14 @@ body: |
; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]]
; SI-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32)
+ ;
; VI-LABEL: name: test_fcanonicalize_s32
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]]
; VI-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32)
+ ;
; GFX9-LABEL: name: test_fcanonicalize_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -45,11 +47,13 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; SI-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64)
+ ;
; VI-LABEL: name: test_fcanonicalize_s64
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; VI-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64)
+ ;
; GFX9-LABEL: name: test_fcanonicalize_s64
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -75,6 +79,7 @@ body: |
; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE]](s32)
; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ;
; VI-LABEL: name: test_fcanonicalize_s16
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -83,6 +88,7 @@ body: |
; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]]
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16)
; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ;
; GFX9-LABEL: name: test_fcanonicalize_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -113,6 +119,7 @@ body: |
; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]]
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; VI-LABEL: name: test_fcanonicalize_v2s32
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
@@ -122,6 +129,7 @@ body: |
; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]]
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX9-LABEL: name: test_fcanonicalize_v2s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
@@ -152,6 +160,7 @@ body: |
; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]]
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32), [[FCANONICALIZE2]](s32)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+ ;
; VI-LABEL: name: test_fcanonicalize_v3s32
; VI: liveins: $vgpr0_vgpr1_vgpr2
; VI-NEXT: {{ $}}
@@ -162,6 +171,7 @@ body: |
; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]]
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FCANONICALIZE]](s32), [[FCANONICALIZE1]](s32), [[FCANONICALIZE2]](s32)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+ ;
; GFX9-LABEL: name: test_fcanonicalize_v3s32
; GFX9: liveins: $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: {{ $}}
@@ -192,6 +202,7 @@ body: |
; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[UV1]]
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FCANONICALIZE]](s64), [[FCANONICALIZE1]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ ;
; VI-LABEL: name: test_fcanonicalize_v2s64
; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; VI-NEXT: {{ $}}
@@ -201,6 +212,7 @@ body: |
; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[UV1]]
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FCANONICALIZE]](s64), [[FCANONICALIZE1]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ ;
; GFX9-LABEL: name: test_fcanonicalize_v2s64
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
@@ -242,6 +254,7 @@ body: |
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ;
; VI-LABEL: name: test_fcanonicalize_v2s16
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -259,6 +272,7 @@ body: |
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ;
; GFX9-LABEL: name: test_fcanonicalize_v2s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -276,22 +290,14 @@ body: |
bb.0:
; SI-LABEL: name: test_fcanonicalize_v3s16
- ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
+ ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT]]
; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE]](s32)
- ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
+ ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT1]]
; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE1]](s32)
- ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
+ ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FPEXT2]]
; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCANONICALIZE2]](s32)
; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
@@ -299,39 +305,30 @@ body: |
; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ;
; VI-LABEL: name: test_fcanonicalize_v3s16
- ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]]
- ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]]
- ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]]
+ ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[DEF]]
+ ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[DEF]]
+ ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[DEF]]
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16)
; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE1]](s16)
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCANONICALIZE2]](s16)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ;
; GFX9-LABEL: name: test_fcanonicalize_v3s16
- ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]]
+ ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16)
- ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]]
- ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]]
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE1]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST1]](s32), [[LSHR]](s32), [[BITCAST2]](s32)
- ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x s32>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FCANONICALIZE1]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32)
+ ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_FCANONICALIZE %0
%2:_(<3 x s32>) = G_ANYEXT %1
@@ -382,6 +379,7 @@ body: |
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+ ;
; VI-LABEL: name: test_fcanonicalize_v4s16
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
@@ -412,6 +410,7 @@ body: |
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+ ;
; GFX9-LABEL: name: test_fcanonicalize_v4s16
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir
index c25db579..cef4cd7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcmp.mir
@@ -19,6 +19,7 @@ body: |
; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[COPY]]
; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]]
; GFX7-NEXT: $vgpr0 = COPY [[SELECT]](s32)
+ ;
; GFX8-LABEL: name: test_fcmp_s32
; GFX8: liveins: $vgpr0
; GFX8-NEXT: {{ $}}
@@ -27,6 +28,7 @@ body: |
; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s32), [[COPY]]
; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]]
; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32)
+ ;
; GFX9-LABEL: name: test_fcmp_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -55,6 +57,7 @@ body: |
; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s64), [[COPY]]
; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]]
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64)
+ ;
; GFX8-LABEL: name: test_fcmp_s64
; GFX8: liveins: $vgpr0_vgpr1
; GFX8-NEXT: {{ $}}
@@ -63,6 +66,7 @@ body: |
; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[C]](s64), [[COPY]]
; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C]], [[COPY]]
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64)
+ ;
; GFX9-LABEL: name: test_fcmp_s64
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
@@ -95,6 +99,7 @@ body: |
; GFX7-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C]], [[TRUNC]]
; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16)
; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ;
; GFX8-LABEL: name: test_fcmp_s16
; GFX8: liveins: $vgpr0
; GFX8-NEXT: {{ $}}
@@ -105,6 +110,7 @@ body: |
; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[C]], [[TRUNC]]
; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16)
; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ;
; GFX9-LABEL: name: test_fcmp_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -141,6 +147,7 @@ body: |
; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX8-LABEL: name: test_fcmp_v2s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8-NEXT: {{ $}}
@@ -153,6 +160,7 @@ body: |
; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX9-LABEL: name: test_fcmp_v2s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
@@ -190,6 +198,7 @@ body: |
; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX8-LABEL: name: test_fcmp_v2s32_flags
; GFX8: liveins: $vgpr0_vgpr1
; GFX8-NEXT: {{ $}}
@@ -202,6 +211,7 @@ body: |
; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX9-LABEL: name: test_fcmp_v2s32_flags
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
@@ -231,43 +241,42 @@ body: |
; GFX7-LABEL: name: test_fcmp_v3s32
; GFX7: liveins: $vgpr0_vgpr1_vgpr2
; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+ ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>)
- ; GFX7-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV3]]
- ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV4]]
- ; GFX7-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV5]]
+ ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; GFX7-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV]]
+ ; GFX7-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV1]]
+ ; GFX7-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV2]]
; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1)
; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ;
; GFX8-LABEL: name: test_fcmp_v3s32
; GFX8: liveins: $vgpr0_vgpr1_vgpr2
; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+ ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>)
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV3]]
- ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV4]]
- ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV5]]
+ ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV]]
+ ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV1]]
+ ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV2]]
; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1)
; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ;
; GFX9-LABEL: name: test_fcmp_v3s32
; GFX9: liveins: $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV]](s32), [[UV3]]
- ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV1]](s32), [[UV4]]
- ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UV2]](s32), [[UV5]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV]]
+ ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV1]]
+ ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[DEF]](s32), [[UV2]]
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP]](s1)
; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP1]](s1)
; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP2]](s1)
@@ -305,6 +314,7 @@ body: |
; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP3]](s1)
; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32)
; GFX7-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>)
+ ;
; GFX8-LABEL: name: test_fcmp_v4s32
; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX8-NEXT: {{ $}}
@@ -323,6 +333,7 @@ body: |
; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FCMP3]](s1)
; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32)
; GFX8-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<4 x s32>)
+ ;
; GFX9-LABEL: name: test_fcmp_v4s32
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
@@ -383,6 +394,7 @@ body: |
; GFX7-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV1]], [[UV3]]
; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32)
; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX8-LABEL: name: test_icmp_v2s16
; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GFX8-NEXT: {{ $}}
@@ -407,6 +419,7 @@ body: |
; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[UV1]], [[UV3]]
; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32)
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX9-LABEL: name: test_icmp_v2s16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GFX9-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir
index a0c2994e..51a16b6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir
@@ -20,6 +20,7 @@ body: |
; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32)
; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32)
; SI-NEXT: $vgpr0 = COPY [[INT1]](s32)
+ ;
; VI-LABEL: name: test_fcos_s32
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -29,6 +30,7 @@ body: |
; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32)
; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32)
; VI-NEXT: $vgpr0 = COPY [[INT1]](s32)
+ ;
; GFX9-LABEL: name: test_fcos_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -57,6 +59,7 @@ body: |
; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64)
; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s64)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT1]](s64)
+ ;
; VI-LABEL: name: test_fcos_s64
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -66,6 +69,7 @@ body: |
; VI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64)
; VI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s64)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT1]](s64)
+ ;
; GFX9-LABEL: name: test_fcos_s64
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -97,6 +101,7 @@ body: |
; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ;
; VI-LABEL: name: test_fcos_s16
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -108,6 +113,7 @@ body: |
; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s16)
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ;
; GFX9-LABEL: name: test_fcos_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -145,6 +151,7 @@ body: |
; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; VI-LABEL: name: test_fcos_v2s32
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
@@ -159,6 +166,7 @@ body: |
; VI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX9-LABEL: name: test_fcos_v2s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
@@ -199,6 +207,7 @@ body: |
; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32), [[INT5]](s32)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+ ;
; VI-LABEL: name: test_fcos_v3s32
; VI: liveins: $vgpr0_vgpr1_vgpr2
; VI-NEXT: {{ $}}
@@ -216,6 +225,7 @@ body: |
; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32), [[INT5]](s32)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+ ;
; GFX9-LABEL: name: test_fcos_v3s32
; GFX9: liveins: $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: {{ $}}
@@ -255,6 +265,7 @@ body: |
; SI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s64)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT1]](s64), [[INT3]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ ;
; VI-LABEL: name: test_fcos_v2s64
; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; VI-NEXT: {{ $}}
@@ -269,6 +280,7 @@ body: |
; VI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s64)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT1]](s64), [[INT3]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ ;
; GFX9-LABEL: name: test_fcos_v2s64
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
@@ -318,6 +330,7 @@ body: |
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ;
; VI-LABEL: name: test_fcos_v2s16
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -340,6 +353,7 @@ body: |
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ;
; GFX9-LABEL: name: test_fcos_v2s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -367,28 +381,20 @@ body: |
bb.0:
; SI-LABEL: name: test_fcos_v3s16
- ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000
- ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C1]]
+ ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000
+ ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]]
; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32)
; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32)
; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
- ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
- ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C1]]
+ ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]]
; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32)
; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s32)
; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32)
- ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
- ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[C1]]
+ ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[C]]
; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32)
; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s32)
; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32)
@@ -397,24 +403,17 @@ body: |
; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ;
; VI-LABEL: name: test_fcos_v3s16
- ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118
- ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]]
+ ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118
+ ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]]
; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s16)
; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s16)
- ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]]
+ ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]]
; VI-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s16)
; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT2]](s16)
- ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]]
+ ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]]
; VI-NEXT: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16)
; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT4]](s16)
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
@@ -422,22 +421,15 @@ body: |
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ;
; GFX9-LABEL: name: test_fcos_v3s16
- ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118
- ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]]
+ ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]]
; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL]](s16)
- ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]]
; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL1]](s16)
- ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]]
+ ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]]
; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[FMUL2]](s16)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
@@ -503,6 +495,7 @@ body: |
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+ ;
; VI-LABEL: name: test_fcos_v4s16
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
@@ -542,6 +535,7 @@ body: |
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+ ;
; GFX9-LABEL: name: test_fcos_v4s16
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
@@ -589,6 +583,7 @@ body: |
; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32)
; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32)
; SI-NEXT: $vgpr0 = COPY [[INT1]](s32)
+ ;
; VI-LABEL: name: test_fcos_s32_flags
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -598,6 +593,7 @@ body: |
; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32)
; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.cos), [[INT]](s32)
; VI-NEXT: $vgpr0 = COPY [[INT1]](s32)
+ ;
; GFX9-LABEL: name: test_fcos_s32_flags
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
index 3fa73c2..30b716a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
@@ -1975,31 +1975,15 @@ body: |
; SI-LABEL: name: test_fdiv_v3s16
; SI: liveins: $vgpr0, $vgpr1
; SI-NEXT: {{ $}}
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0
; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1
; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32)
; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]]
- ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]]
+ ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]]
; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]]
; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]]
; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]]
@@ -2008,13 +1992,13 @@ body: |
; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1)
; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32)
; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32)
- ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
- ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16)
+ ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0
; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1
; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32)
; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]]
- ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]]
+ ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]]
; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]]
; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]]
; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]]
@@ -2023,13 +2007,13 @@ body: |
; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1)
; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32)
; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32)
- ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
- ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
+ ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; SI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 0
; SI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 1
; SI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32)
; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]]
- ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]]
+ ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]]
; SI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]]
; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]]
; SI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]]
@@ -2047,25 +2031,9 @@ body: |
; VI-LABEL: name: test_fdiv_v3s16
; VI: liveins: $vgpr0, $vgpr1
; VI-NEXT: {{ $}}
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]]
; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32)
; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]]
@@ -2076,13 +2044,13 @@ body: |
; VI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]]
; VI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]]
; VI-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]]
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C1]]
+ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C]]
; VI-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]]
; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32)
- ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16)
- ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
- ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16)
+ ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]]
; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32)
; VI-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]]
@@ -2093,12 +2061,12 @@ body: |
; VI-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FADD5]]
; VI-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL8]], [[FPEXT2]]
; VI-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FADD6]], [[INT2]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C1]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C]]
; VI-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FADD5]]
; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32)
- ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16)
- ; VI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
- ; VI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
+ ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; VI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; VI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; VI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT5]]
; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32)
; VI-NEXT: [[FMUL10:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]]
@@ -2109,10 +2077,10 @@ body: |
; VI-NEXT: [[FMUL13:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FADD9]]
; VI-NEXT: [[FADD10:%[0-9]+]]:_(s32) = G_FADD [[FMUL13]], [[FPEXT4]]
; VI-NEXT: [[FMUL14:%[0-9]+]]:_(s32) = G_FMUL [[FADD10]], [[INT4]]
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C1]]
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C]]
; VI-NEXT: [[FADD11:%[0-9]+]]:_(s32) = G_FADD [[AND2]], [[FADD9]]
; VI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD11]](s32)
- ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16)
+ ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[DEF]](s16), [[DEF]](s16)
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16)
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16)
@@ -2122,25 +2090,9 @@ body: |
; GFX9-LABEL: name: test_fdiv_v3s16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]]
; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32)
; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]]
@@ -2151,13 +2103,13 @@ body: |
; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]]
; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]]
; GFX9-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]]
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608
- ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C1]]
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608
+ ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C]]
; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]]
; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32)
- ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16)
- ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
- ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16)
+ ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]]
; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32)
; GFX9-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]]
@@ -2168,12 +2120,12 @@ body: |
; GFX9-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FADD5]]
; GFX9-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL8]], [[FPEXT2]]
; GFX9-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FADD6]], [[INT2]]
- ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C1]]
+ ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C]]
; GFX9-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FADD5]]
; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32)
- ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16)
- ; GFX9-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
- ; GFX9-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
+ ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; GFX9-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT5]]
; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32)
; GFX9-NEXT: [[FMUL10:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]]
@@ -2184,10 +2136,10 @@ body: |
; GFX9-NEXT: [[FMUL13:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FADD9]]
; GFX9-NEXT: [[FADD10:%[0-9]+]]:_(s32) = G_FADD [[FMUL13]], [[FPEXT4]]
; GFX9-NEXT: [[FMUL14:%[0-9]+]]:_(s32) = G_FMUL [[FADD10]], [[INT4]]
- ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C1]]
+ ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C]]
; GFX9-NEXT: [[FADD11:%[0-9]+]]:_(s32) = G_FADD [[AND2]], [[FADD9]]
; GFX9-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD11]](s32)
- ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16)
+ ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16)
; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16)
@@ -2197,25 +2149,9 @@ body: |
; GFX10-LABEL: name: test_fdiv_v3s16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]]
; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32)
; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]]
@@ -2226,13 +2162,13 @@ body: |
; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FNEG]], [[FADD1]]
; GFX10-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FPEXT]]
; GFX10-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FADD2]], [[INT]]
- ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608
- ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C1]]
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8388608
+ ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL4]], [[C]]
; GFX10-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FADD1]]
; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32)
- ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16)
- ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
- ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16)
+ ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]]
; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32)
; GFX10-NEXT: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]]
@@ -2243,12 +2179,12 @@ body: |
; GFX10-NEXT: [[FMUL8:%[0-9]+]]:_(s32) = G_FMUL [[FNEG1]], [[FADD5]]
; GFX10-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FMUL8]], [[FPEXT2]]
; GFX10-NEXT: [[FMUL9:%[0-9]+]]:_(s32) = G_FMUL [[FADD6]], [[INT2]]
- ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C1]]
+ ; GFX10-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL9]], [[C]]
; GFX10-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FADD5]]
; GFX10-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32)
- ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16)
- ; GFX10-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
- ; GFX10-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
+ ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; GFX10-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; GFX10-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; GFX10-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT5]]
; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32)
; GFX10-NEXT: [[FMUL10:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]]
@@ -2259,10 +2195,10 @@ body: |
; GFX10-NEXT: [[FMUL13:%[0-9]+]]:_(s32) = G_FMUL [[FNEG2]], [[FADD9]]
; GFX10-NEXT: [[FADD10:%[0-9]+]]:_(s32) = G_FADD [[FMUL13]], [[FPEXT4]]
; GFX10-NEXT: [[FMUL14:%[0-9]+]]:_(s32) = G_FMUL [[FADD10]], [[INT4]]
- ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C1]]
+ ; GFX10-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL14]], [[C]]
; GFX10-NEXT: [[FADD11:%[0-9]+]]:_(s32) = G_FADD [[AND2]], [[FADD9]]
; GFX10-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD11]](s32)
- ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16)
+ ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[DEF]](s16), [[DEF]](s16)
; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16)
; GFX10-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16)
@@ -2272,12 +2208,9 @@ body: |
; GFX11-LABEL: name: test_fdiv_v3s16
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX11-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
- ; GFX11-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
+ ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX11-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; GFX11-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; GFX11-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT1]]
; GFX11-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32)
; GFX11-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]]
@@ -2289,9 +2222,9 @@ body: |
; GFX11-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[FMUL1]], [[C]]
; GFX11-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[AND]], [[FMA1]]
; GFX11-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32)
- ; GFX11-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[UV4]](s16), [[UV]](s16)
- ; GFX11-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
- ; GFX11-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
+ ; GFX11-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; GFX11-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; GFX11-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; GFX11-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT3]]
; GFX11-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32)
; GFX11-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]]
@@ -2302,9 +2235,9 @@ body: |
; GFX11-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[FMUL3]], [[C]]
; GFX11-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[AND1]], [[FMA4]]
; GFX11-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32)
- ; GFX11-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[UV5]](s16), [[UV1]](s16)
- ; GFX11-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
- ; GFX11-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
+ ; GFX11-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; GFX11-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; GFX11-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; GFX11-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT5]]
; GFX11-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32)
; GFX11-NEXT: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]]
@@ -2315,7 +2248,7 @@ body: |
; GFX11-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[FMUL5]], [[C]]
; GFX11-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[AND2]], [[FMA7]]
; GFX11-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32)
- ; GFX11-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[UV6]](s16), [[UV2]](s16)
+ ; GFX11-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[DEF]](s16), [[DEF]](s16)
; GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
; GFX11-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16)
; GFX11-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir
index 3289d60..9999c181 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir
@@ -17,12 +17,14 @@ body: |
; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]]
; SI-NEXT: $vgpr0 = COPY [[FFLOOR]](s32)
+ ;
; VI-LABEL: name: test_ffloor_s32
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]]
; VI-NEXT: $vgpr0 = COPY [[FFLOOR]](s32)
+ ;
; GFX9-LABEL: name: test_ffloor_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -52,12 +54,14 @@ body: |
; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]]
; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]]
; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64)
+ ;
; VI-LABEL: name: test_ffloor_s64
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]]
; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64)
+ ;
; GFX9-LABEL: name: test_ffloor_s64
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -86,12 +90,14 @@ body: |
; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = nnan G_FNEG [[FMINNUM_IEEE]]
; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan G_FADD [[COPY]], [[FNEG]]
; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64)
+ ;
; VI-LABEL: name: test_ffloor_s64_nnan
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]]
; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64)
+ ;
; GFX9-LABEL: name: test_ffloor_s64_nnan
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -122,12 +128,14 @@ body: |
; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = nsz G_FNEG [[SELECT]]
; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD [[COPY]], [[FNEG]]
; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64)
+ ;
; VI-LABEL: name: test_ffloor_s64_nssaz
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]]
; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64)
+ ;
; GFX9-LABEL: name: test_ffloor_s64_nssaz
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -156,6 +164,7 @@ body: |
; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32)
; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ;
; VI-LABEL: name: test_ffloor_s16
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -164,6 +173,7 @@ body: |
; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]]
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16)
; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ;
; GFX9-LABEL: name: test_ffloor_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -194,6 +204,7 @@ body: |
; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]]
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; VI-LABEL: name: test_ffloor_v2s32
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
@@ -203,6 +214,7 @@ body: |
; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]]
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX9-LABEL: name: test_ffloor_v2s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
@@ -233,6 +245,7 @@ body: |
; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]]
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+ ;
; VI-LABEL: name: test_ffloor_v3s32
; VI: liveins: $vgpr0_vgpr1_vgpr2
; VI-NEXT: {{ $}}
@@ -243,6 +256,7 @@ body: |
; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]]
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+ ;
; GFX9-LABEL: name: test_ffloor_v3s32
; GFX9: liveins: $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: {{ $}}
@@ -284,6 +298,7 @@ body: |
; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]]
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ ;
; VI-LABEL: name: test_ffloor_v2s64
; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; VI-NEXT: {{ $}}
@@ -293,6 +308,7 @@ body: |
; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]]
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ ;
; GFX9-LABEL: name: test_ffloor_v2s64
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
@@ -334,6 +350,7 @@ body: |
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ;
; VI-LABEL: name: test_ffloor_v2s16
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -351,6 +368,7 @@ body: |
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ;
; GFX9-LABEL: name: test_ffloor_v2s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -375,22 +393,14 @@ body: |
bb.0:
; SI-LABEL: name: test_ffloor_v3s16
- ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
+ ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]]
; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32)
- ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
+ ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]]
; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32)
- ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
+ ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]]
; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32)
; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
@@ -398,37 +408,23 @@ body: |
; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ;
; VI-LABEL: name: test_ffloor_v3s16
- ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]]
- ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]]
- ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]]
+ ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[DEF]]
+ ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[DEF]]
+ ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[DEF]]
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16)
; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16)
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ;
; GFX9-LABEL: name: test_ffloor_v3s16
- ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]]
- ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]]
- ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]]
+ ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[DEF]]
+ ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[DEF]]
+ ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[DEF]]
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16)
; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16)
; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16)
@@ -484,6 +480,7 @@ body: |
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+ ;
; VI-LABEL: name: test_ffloor_v4s16
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
@@ -514,6 +511,7 @@ body: |
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+ ;
; GFX9-LABEL: name: test_ffloor_v4s16
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir
index fd49c993..df2ce44 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir
@@ -501,27 +501,19 @@ body: |
; SI-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16)
; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]]
; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
- ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]]
- ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_fma_v3s16
@@ -555,27 +547,19 @@ body: |
; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC3]], [[TRUNC6]]
; VI-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC4]], [[TRUNC7]]
; VI-NEXT: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC5]], [[TRUNC8]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
- ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16)
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]]
- ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_fma_v3s16
@@ -585,39 +569,22 @@ body: |
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16)
; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV]], [[UV3]], [[UV6]]
- ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>)
+ ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV1]], [[UV4]], [[UV7]]
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
index d977049..3fc2b1c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
@@ -503,27 +503,19 @@ body: |
; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]]
; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_fmaxnum_v3s16
@@ -555,27 +547,19 @@ body: |
; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]]
; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]]
; VI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16)
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_fmaxnum_v3s16
@@ -584,39 +568,25 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16)
; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]]
; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]]
; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
- ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]]
- ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]]
+ ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]]
; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir
index 32c353d..e2b3f35 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir
@@ -503,27 +503,19 @@ body: |
; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]]
; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_fminnum_v3s16
@@ -555,27 +547,19 @@ body: |
; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]]
; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]]
; VI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16)
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_fminnum_v3s16
@@ -584,39 +568,25 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16)
; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]]
; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]]
; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
- ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR]]
- ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]]
+ ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]]
; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir
index bcc38df..33eadb8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir
@@ -425,27 +425,19 @@ body: |
; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[FPEXT5]]
; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_fmul_v3s16
@@ -471,27 +463,19 @@ body: |
; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC3]]
; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[TRUNC4]]
; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[TRUNC5]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL1]](s16)
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMUL2]](s16)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9PLUS-LABEL: name: test_fmul_v3s16
@@ -500,35 +484,21 @@ body: |
; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
- ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16)
; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[UV]], [[UV3]]
- ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL1]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
- ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9PLUS-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
- ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[UV1]], [[UV4]]
+ ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL1]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir
index eede021..82bd14d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir
@@ -264,65 +264,58 @@ body: |
bb.0:
; SI-LABEL: name: test_fneg_v3s16
- ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
- ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]]
+ ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32)
+ ; SI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST]]
; SI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST1]]
; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>)
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>)
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[LSHR]](s32), [[AND2]](s32)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]]
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32)
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
;
; VI-LABEL: name: test_fneg_v3s16
- ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
- ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]]
+ ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32)
+ ; VI-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST]]
; VI-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST1]]
; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>)
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32)
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>)
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND1]](s32), [[LSHR]](s32), [[AND2]](s32)
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]]
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32)
; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
;
; GFX9-LABEL: name: test_fneg_v3s16
- ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR]]
+ ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF1]](s16)
- ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[UV]]
- ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR]]
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FNEG1]](<2 x s16>)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32)
- ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<3 x s32>)
+ ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
+ ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[LSHR]](s32), [[AND1]](s32)
+ ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_FNEG %0
%2:_(<3 x s32>) = G_ZEXT %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir
index bd9eef0..cc3cf5d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir
@@ -105,21 +105,11 @@ body: |
; CHECK-LABEL: name: test_fpext_v4f16_to_v4f32
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
- ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
- ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT2]](s32), [[FPEXT3]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s16>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
index b08f850..aab47bc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
@@ -395,13 +395,13 @@ body: |
bb.0:
; CHECK-LABEL: name: test_freeze_v33s32
- ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]]
- ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]]
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<32 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE1]](s32)
- ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<33 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[FREEZE1]](s32)
+ ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR1]](<33 x s32>)
%0:_(<33 x s32>) = G_IMPLICIT_DEF
%1:_(<33 x s32>) = G_FREEZE %0
S_NOP 0, implicit %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
index 2400362..4dfc9ed 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir
@@ -723,28 +723,22 @@ body: |
; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR7]], [[ZEXT5]](s32)
; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC7]], [[TRUNC8]]
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL3]]
- ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
- ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]]
- ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
+ ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32)
; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]]
- ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]]
- ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
- ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL5]]
- ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
- ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
- ; SI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32)
+ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C5]], [[SHL5]]
+ ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; SI-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+ ; SI-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+ ; SI-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
;
; VI-LABEL: name: test_fshl_v3s16_v3s16
; VI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -798,29 +792,22 @@ body: |
; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16)
; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[LSHR7]], [[AND5]](s16)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL2]], [[LSHR8]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
- ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
- ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]]
- ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32)
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
- ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]]
- ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
- ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL5]]
- ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
- ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
- ; VI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32)
+ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL5]]
+ ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; VI-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+ ; VI-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+ ; VI-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
;
; GFX9-LABEL: name: test_fshl_v3s16_v3s16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -856,17 +843,9 @@ body: |
; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL1]], [[LSHR3]]
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>)
; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR7]](<2 x s16>)
; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR6]](<2 x s16>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
index 5f610924..243ad1f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir
@@ -679,28 +679,22 @@ body: |
; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT5]](s32)
; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC7]], [[TRUNC8]]
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL6]]
- ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
- ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]]
- ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
+ ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32)
; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]]
- ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]]
- ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32)
- ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]]
- ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
- ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
- ; SI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32)
+ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C5]], [[SHL8]]
+ ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; SI-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+ ; SI-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+ ; SI-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
;
; VI-LABEL: name: test_fshr_v3s16_v3s16
; VI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -754,29 +748,22 @@ body: |
; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[SHL4]], [[AND5]](s16)
; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[AND4]](s16)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR5]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
- ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]]
- ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
- ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]]
- ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32)
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]]
- ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]]
- ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
- ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]]
- ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>)
- ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>)
- ; VI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32)
+ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL8]]
+ ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; VI-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>)
+ ; VI-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>)
+ ; VI-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>)
;
; GFX9-LABEL: name: test_fshr_v3s16_v3s16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -812,17 +799,9 @@ body: |
; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = disjoint G_OR [[SHL3]], [[LSHR1]]
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>)
; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR7]](<2 x s16>)
; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR6]](<2 x s16>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir
index 846ad4a..9e5f6b6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir
@@ -20,6 +20,7 @@ body: |
; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32)
; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32)
; SI-NEXT: $vgpr0 = COPY [[INT1]](s32)
+ ;
; VI-LABEL: name: test_fsin_s32
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -29,6 +30,7 @@ body: |
; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32)
; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32)
; VI-NEXT: $vgpr0 = COPY [[INT1]](s32)
+ ;
; GFX9-LABEL: name: test_fsin_s32
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -57,6 +59,7 @@ body: |
; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64)
; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s64)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT1]](s64)
+ ;
; VI-LABEL: name: test_fsin_s64
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -66,6 +69,7 @@ body: |
; VI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s64)
; VI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s64)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT1]](s64)
+ ;
; GFX9-LABEL: name: test_fsin_s64
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -97,6 +101,7 @@ body: |
; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ;
; VI-LABEL: name: test_fsin_s16
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -108,6 +113,7 @@ body: |
; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s16)
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ;
; GFX9-LABEL: name: test_fsin_s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -145,6 +151,7 @@ body: |
; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; VI-LABEL: name: test_fsin_v2s32
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
@@ -159,6 +166,7 @@ body: |
; VI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ;
; GFX9-LABEL: name: test_fsin_v2s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
@@ -199,6 +207,7 @@ body: |
; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32), [[INT5]](s32)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+ ;
; VI-LABEL: name: test_fsin_v3s32
; VI: liveins: $vgpr0_vgpr1_vgpr2
; VI-NEXT: {{ $}}
@@ -216,6 +225,7 @@ body: |
; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT1]](s32), [[INT3]](s32), [[INT5]](s32)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+ ;
; GFX9-LABEL: name: test_fsin_v3s32
; GFX9: liveins: $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: {{ $}}
@@ -255,6 +265,7 @@ body: |
; SI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s64)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT1]](s64), [[INT3]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ ;
; VI-LABEL: name: test_fsin_v2s64
; VI: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; VI-NEXT: {{ $}}
@@ -269,6 +280,7 @@ body: |
; VI-NEXT: [[INT3:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s64)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT1]](s64), [[INT3]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ ;
; GFX9-LABEL: name: test_fsin_v2s64
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9-NEXT: {{ $}}
@@ -318,6 +330,7 @@ body: |
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ;
; VI-LABEL: name: test_fsin_v2s16
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -340,6 +353,7 @@ body: |
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ;
; GFX9-LABEL: name: test_fsin_v2s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
@@ -367,28 +381,20 @@ body: |
bb.0:
; SI-LABEL: name: test_fsin_v3s16
- ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000
- ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C1]]
+ ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FC45F3060000000
+ ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]]
; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32)
; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32)
; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
- ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
- ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C1]]
+ ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]]
; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s32)
; SI-NEXT: [[INT3:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s32)
; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT3]](s32)
- ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
- ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[C1]]
+ ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
+ ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[C]]
; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s32)
; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s32)
; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT5]](s32)
@@ -397,24 +403,17 @@ body: |
; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ;
; VI-LABEL: name: test_fsin_v3s16
- ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118
- ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]]
+ ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118
+ ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]]
; VI-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s16)
; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s16)
- ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]]
+ ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]]
; VI-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL1]](s16)
; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT2]](s16)
- ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]]
+ ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]]
; VI-NEXT: [[INT4:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL2]](s16)
; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT4]](s16)
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
@@ -422,22 +421,15 @@ body: |
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32)
; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ;
; GFX9-LABEL: name: test_fsin_v3s16
- ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118
- ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C1]]
+ ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]]
; GFX9-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL]](s16)
- ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[C1]]
+ ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]]
; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL1]](s16)
- ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[C1]]
+ ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[DEF]], [[C]]
; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[FMUL2]](s16)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16)
@@ -503,6 +495,7 @@ body: |
; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+ ;
; VI-LABEL: name: test_fsin_v4s16
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
@@ -542,6 +535,7 @@ body: |
; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+ ;
; GFX9-LABEL: name: test_fsin_v4s16
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
@@ -589,6 +583,7 @@ body: |
; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32)
; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32)
; SI-NEXT: $vgpr0 = COPY [[INT1]](s32)
+ ;
; VI-LABEL: name: test_fsin_s32_flags
; VI: liveins: $vgpr0
; VI-NEXT: {{ $}}
@@ -598,6 +593,7 @@ body: |
; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[FMUL]](s32)
; VI-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.sin), [[INT]](s32)
; VI-NEXT: $vgpr0 = COPY [[INT1]](s32)
+ ;
; GFX9-LABEL: name: test_fsin_s32_flags
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir
index 4089962..0ed4865 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir
@@ -440,22 +440,14 @@ body: |
bb.0:
; SI-LABEL: name: test_fsqrt_v3s16
- ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
+ ; SI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; SI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT]](s32)
; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT]](s32)
- ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
+ ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; SI-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT1]](s32)
; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT1]](s32)
- ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
+ ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[DEF]](s16)
; SI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sqrt), [[FPEXT2]](s32)
; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT2]](s32)
; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
@@ -465,18 +457,10 @@ body: |
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
;
; VI-LABEL: name: test_fsqrt_v3s16
- ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]]
- ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]]
- ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]]
+ ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[DEF]]
+ ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[DEF]]
+ ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[DEF]]
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16)
; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16)
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16)
@@ -484,18 +468,10 @@ body: |
; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
;
; GFX9-LABEL: name: test_fsqrt_v3s16
- ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]]
- ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]]
- ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]]
+ ; GFX9: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[DEF]]
+ ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[DEF]]
+ ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[DEF]]
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16)
; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16)
; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir
index 667d79f..b9702d2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir
@@ -478,27 +478,19 @@ body: |
; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16)
; SI-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT4]], [[FPEXT5]]
; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_fsub_v3s16
@@ -524,27 +516,19 @@ body: |
; VI-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC3]]
; VI-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[TRUNC4]]
; VI-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[TRUNC5]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB1]](s16)
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSUB2]](s16)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_fsub_v3s16
@@ -570,17 +554,10 @@ body: |
; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC]], [[TRUNC3]]
; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC1]], [[TRUNC4]]
; GFX9-NEXT: [[FSUB2:%[0-9]+]]:_(s16) = G_FSUB [[TRUNC2]], [[TRUNC5]]
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
- ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSUB]](s16), [[FSUB1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSUB2]](s16), [[TRUNC6]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FSUB2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir
index 2d54d7c..2d5bf68 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir
@@ -300,13 +300,12 @@ body: |
; GFX7-LABEL: name: test_icmp_v3s32
; GFX7: liveins: $vgpr0_vgpr1_vgpr2
; GFX7-NEXT: {{ $}}
- ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+ ; GFX7-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX7-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>)
- ; GFX7-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]]
- ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]]
- ; GFX7-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]]
+ ; GFX7-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; GFX7-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV]]
+ ; GFX7-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV1]]
+ ; GFX7-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV2]]
; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1)
@@ -320,13 +319,12 @@ body: |
; GFX8-LABEL: name: test_icmp_v3s32
; GFX8: liveins: $vgpr0_vgpr1_vgpr2
; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+ ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>)
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]]
- ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]]
- ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]]
+ ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV]]
+ ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV1]]
+ ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV2]]
; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1)
@@ -340,13 +338,12 @@ body: |
; GFX9-LABEL: name: test_icmp_v3s32
; GFX9: liveins: $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>)
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
- ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]]
- ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]]
- ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+ ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV]]
+ ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV1]]
+ ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[DEF]](s32), [[UV2]]
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir
index 1e6f1e7..3f0161f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir
@@ -7,13 +7,198 @@ name: test_implicit_def_s1025
body: |
bb.0:
; TAHITI-LABEL: name: test_implicit_def_s1025
- ; TAHITI: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF
- ; TAHITI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024)
- ; TAHITI-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; TAHITI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; TAHITI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; TAHITI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; TAHITI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; TAHITI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; TAHITI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32)
+ ; TAHITI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+ ; TAHITI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; TAHITI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C3]](s32)
+ ; TAHITI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+ ; TAHITI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; TAHITI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C4]](s32)
+ ; TAHITI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]]
+ ; TAHITI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; TAHITI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C5]](s32)
+ ; TAHITI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+ ; TAHITI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+ ; TAHITI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C6]](s32)
+ ; TAHITI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+ ; TAHITI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+ ; TAHITI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C7]](s32)
+ ; TAHITI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
+ ; TAHITI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; TAHITI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C8]](s32)
+ ; TAHITI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+ ; TAHITI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+ ; TAHITI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C9]](s32)
+ ; TAHITI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+ ; TAHITI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; TAHITI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C10]](s32)
+ ; TAHITI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]]
+ ; TAHITI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+ ; TAHITI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C11]](s32)
+ ; TAHITI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+ ; TAHITI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; TAHITI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C12]](s32)
+ ; TAHITI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+ ; TAHITI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+ ; TAHITI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C13]](s32)
+ ; TAHITI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]]
+ ; TAHITI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+ ; TAHITI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C14]](s32)
+ ; TAHITI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+ ; TAHITI-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+ ; TAHITI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C15]](s32)
+ ; TAHITI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+ ; TAHITI-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; TAHITI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C16]](s32)
+ ; TAHITI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]]
+ ; TAHITI-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+ ; TAHITI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C17]](s32)
+ ; TAHITI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+ ; TAHITI-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
+ ; TAHITI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C18]](s32)
+ ; TAHITI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+ ; TAHITI-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
+ ; TAHITI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C19]](s32)
+ ; TAHITI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]]
+ ; TAHITI-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; TAHITI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C20]](s32)
+ ; TAHITI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
+ ; TAHITI-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
+ ; TAHITI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C21]](s32)
+ ; TAHITI-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
+ ; TAHITI-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
+ ; TAHITI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C22]](s32)
+ ; TAHITI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]]
+ ; TAHITI-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+ ; TAHITI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C23]](s32)
+ ; TAHITI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
+ ; TAHITI-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; TAHITI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C24]](s32)
+ ; TAHITI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+ ; TAHITI-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
+ ; TAHITI-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C25]](s32)
+ ; TAHITI-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]]
+ ; TAHITI-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
+ ; TAHITI-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C26]](s32)
+ ; TAHITI-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]]
+ ; TAHITI-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
+ ; TAHITI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C27]](s32)
+ ; TAHITI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]]
+ ; TAHITI-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+ ; TAHITI-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C28]](s32)
+ ; TAHITI-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]]
+ ; TAHITI-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
+ ; TAHITI-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C29]](s32)
+ ; TAHITI-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]]
+ ; TAHITI-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+ ; TAHITI-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C30]](s32)
+ ; TAHITI-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]]
+ ; TAHITI-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+ ; TAHITI-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C31]](s32)
+ ; TAHITI-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]]
+ ; TAHITI-NEXT: $vgpr0 = COPY [[OR30]](s32)
+ ;
; FIJI-LABEL: name: test_implicit_def_s1025
- ; FIJI: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF
- ; FIJI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024)
- ; FIJI-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; FIJI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; FIJI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; FIJI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; FIJI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; FIJI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; FIJI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C2]](s32)
+ ; FIJI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+ ; FIJI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; FIJI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C3]](s32)
+ ; FIJI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+ ; FIJI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; FIJI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C4]](s32)
+ ; FIJI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]]
+ ; FIJI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; FIJI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C5]](s32)
+ ; FIJI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+ ; FIJI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+ ; FIJI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C6]](s32)
+ ; FIJI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+ ; FIJI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+ ; FIJI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C7]](s32)
+ ; FIJI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
+ ; FIJI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; FIJI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C8]](s32)
+ ; FIJI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+ ; FIJI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+ ; FIJI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C9]](s32)
+ ; FIJI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]]
+ ; FIJI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; FIJI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C10]](s32)
+ ; FIJI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]]
+ ; FIJI-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+ ; FIJI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C11]](s32)
+ ; FIJI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]]
+ ; FIJI-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; FIJI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C12]](s32)
+ ; FIJI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]]
+ ; FIJI-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+ ; FIJI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C13]](s32)
+ ; FIJI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]]
+ ; FIJI-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+ ; FIJI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C14]](s32)
+ ; FIJI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]]
+ ; FIJI-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+ ; FIJI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C15]](s32)
+ ; FIJI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]]
+ ; FIJI-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; FIJI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C16]](s32)
+ ; FIJI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]]
+ ; FIJI-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+ ; FIJI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C17]](s32)
+ ; FIJI-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]]
+ ; FIJI-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
+ ; FIJI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C18]](s32)
+ ; FIJI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]]
+ ; FIJI-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
+ ; FIJI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C19]](s32)
+ ; FIJI-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]]
+ ; FIJI-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; FIJI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C20]](s32)
+ ; FIJI-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]]
+ ; FIJI-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21
+ ; FIJI-NEXT: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C21]](s32)
+ ; FIJI-NEXT: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]]
+ ; FIJI-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22
+ ; FIJI-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C22]](s32)
+ ; FIJI-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]]
+ ; FIJI-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+ ; FIJI-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C23]](s32)
+ ; FIJI-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]]
+ ; FIJI-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; FIJI-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C24]](s32)
+ ; FIJI-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]]
+ ; FIJI-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
+ ; FIJI-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C25]](s32)
+ ; FIJI-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]]
+ ; FIJI-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26
+ ; FIJI-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C26]](s32)
+ ; FIJI-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]]
+ ; FIJI-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27
+ ; FIJI-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C27]](s32)
+ ; FIJI-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]]
+ ; FIJI-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+ ; FIJI-NEXT: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C28]](s32)
+ ; FIJI-NEXT: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]]
+ ; FIJI-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29
+ ; FIJI-NEXT: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C29]](s32)
+ ; FIJI-NEXT: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]]
+ ; FIJI-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
+ ; FIJI-NEXT: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C30]](s32)
+ ; FIJI-NEXT: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]]
+ ; FIJI-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+ ; FIJI-NEXT: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C31]](s32)
+ ; FIJI-NEXT: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]]
+ ; FIJI-NEXT: $vgpr0 = COPY [[OR30]](s32)
%0:_(s1025) = G_IMPLICIT_DEF
%1:_(s32) = G_TRUNC %0
$vgpr0 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
index 8113ebf..4e4ab1b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
@@ -150,9 +150,8 @@ body: |
bb.0:
; CHECK-LABEL: name: test_implicit_def_s512
- ; CHECK: [[DEF:%[0-9]+]]:_(s512) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s512), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(s512) = G_IMPLICIT_DEF
%1:_(s32) = G_EXTRACT %0, 0
$vgpr0 = COPY %1
@@ -164,9 +163,8 @@ body: |
bb.0:
; CHECK-LABEL: name: test_implicit_def_s1024
- ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s1024), 0
- ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(s1024) = G_IMPLICIT_DEF
%1:_(s32) = G_EXTRACT %0, 0
$vgpr0 = COPY %1
@@ -178,9 +176,8 @@ body: |
bb.0:
; CHECK-LABEL: name: test_implicit_def_s1056
- ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
%0:_(s1056) = G_IMPLICIT_DEF
%1:_(s32) = G_TRUNC %0
$vgpr0 = COPY %1
@@ -317,12 +314,10 @@ body: |
; CHECK-LABEL: name: test_implicit_def_v33s32
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1)
- ; CHECK-NEXT: G_STORE [[DEF1]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1)
%0:_(<33 x s32>) = G_IMPLICIT_DEF
%1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32), %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32), %23:_(s32), %24:_(s32), %25:_(s32), %26:_(s32), %27:_(s32), %28:_(s32), %29:_(s32), %30:_(s32), %31:_(s32), %32:_(s32), %33:_(s32) = G_UNMERGE_VALUES %0
%34:_(p1) = COPY $vgpr0_vgpr1
@@ -390,9 +385,8 @@ body: |
bb.0:
; CHECK-LABEL: name: test_implicit_def_v3s8
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s8>) = G_IMPLICIT_DEF
%1:_(<3 x s32>) = G_ANYEXT %0
@@ -417,26 +411,15 @@ body: |
bb.0:
; CHECK-LABEL: name: test_implicit_def_v3s16
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<4 x s16>) = G_IMPLICIT_DEF
@@ -462,32 +445,21 @@ body: |
bb.0:
; CHECK-LABEL: name: test_implicit_def_v5s16
- ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[UV6]](<2 x s16>)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>)
%0:_(<5 x s16>) = G_IMPLICIT_DEF
%1:_(<8 x s16>) = G_IMPLICIT_DEF
@@ -501,11 +473,21 @@ body: |
bb.0:
; CHECK-LABEL: name: test_implicit_def_v6s16
- ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV3]](<2 x s16>)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>)
%0:_(<6 x s16>) = G_IMPLICIT_DEF
%1:_(<8 x s16>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
index 1bc7cd0..9b9c6d4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
@@ -122,9 +122,8 @@ body: |
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[UV8]](s64), [[UV9]](s64), [[UV10]](s64), [[UV11]](s64), [[UV12]](s64), [[UV13]](s64), [[UV14]](s64), [[UV15]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s64>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(<16 x s64>) = G_IMPLICIT_DEF
@@ -144,9 +143,8 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[UV1]](s32)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s8) = G_TRUNC %0
@@ -190,90 +188,86 @@ body: |
; CHECK-LABEL: name: insert_vector_elt_64_65_v64s32
; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>), [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
- ; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
- ; CHECK-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CHECK-NEXT: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD1]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD1]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64)
- ; CHECK-NEXT: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD2]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD2]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
- ; CHECK-NEXT: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C4]](s64)
- ; CHECK-NEXT: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
- ; CHECK-NEXT: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112
; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C6]](s64)
- ; CHECK-NEXT: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C7]](s64)
- ; CHECK-NEXT: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 144
; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C8]](s64)
- ; CHECK-NEXT: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 160
; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C9]](s64)
- ; CHECK-NEXT: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 176
; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C10]](s64)
- ; CHECK-NEXT: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 192
; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C11]](s64)
- ; CHECK-NEXT: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 208
; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C12]](s64)
- ; CHECK-NEXT: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 224
; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C13]](s64)
- ; CHECK-NEXT: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 240
; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C14]](s64)
- ; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>), [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
- ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>), [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
- ; CHECK-NEXT: G_STORE [[UV16]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C]](s64)
- ; CHECK-NEXT: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C1]](s64)
- ; CHECK-NEXT: G_STORE [[UV18]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C2]](s64)
- ; CHECK-NEXT: G_STORE [[UV19]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C3]](s64)
- ; CHECK-NEXT: G_STORE [[UV20]](<4 x s32>), [[PTR_ADD18]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD18]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C4]](s64)
- ; CHECK-NEXT: G_STORE [[UV21]](<4 x s32>), [[PTR_ADD19]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD19]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C5]](s64)
- ; CHECK-NEXT: G_STORE [[UV22]](<4 x s32>), [[PTR_ADD20]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD20]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C6]](s64)
- ; CHECK-NEXT: G_STORE [[UV23]](<4 x s32>), [[PTR_ADD21]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD21]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C7]](s64)
- ; CHECK-NEXT: G_STORE [[UV24]](<4 x s32>), [[PTR_ADD22]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD22]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C8]](s64)
- ; CHECK-NEXT: G_STORE [[UV25]](<4 x s32>), [[PTR_ADD23]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD23]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C9]](s64)
- ; CHECK-NEXT: G_STORE [[UV26]](<4 x s32>), [[PTR_ADD24]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD24]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C10]](s64)
- ; CHECK-NEXT: G_STORE [[UV27]](<4 x s32>), [[PTR_ADD25]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD25]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C11]](s64)
- ; CHECK-NEXT: G_STORE [[UV28]](<4 x s32>), [[PTR_ADD26]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD26]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C12]](s64)
- ; CHECK-NEXT: G_STORE [[UV29]](<4 x s32>), [[PTR_ADD27]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD27]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C13]](s64)
- ; CHECK-NEXT: G_STORE [[UV30]](<4 x s32>), [[PTR_ADD28]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD28]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1)
; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY1]], [[C14]](s64)
- ; CHECK-NEXT: G_STORE [[UV31]](<4 x s32>), [[PTR_ADD29]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
+ ; CHECK-NEXT: G_STORE [[DEF]](<4 x s32>), [[PTR_ADD29]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1)
%0:_(p1) = COPY $sgpr0_sgpr1
%1:_(s32) = G_CONSTANT i32 64
%2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir
index d8d0f9b..495740be 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir
@@ -968,22 +968,13 @@ body: |
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
@@ -1006,24 +997,15 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
@@ -1046,17 +1028,14 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
@@ -1078,25 +1057,16 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[UV1]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
@@ -1122,20 +1092,17 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
@@ -1156,24 +1123,15 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[UV1]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
@@ -1198,20 +1156,17 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_EXTRACT %0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir
index 7426ff9..4ebf883 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir
@@ -795,27 +795,19 @@ body: |
; GFX6-NEXT: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT %25(s16)
; GFX6-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]]
; GFX6-NEXT: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32)
- ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16)
; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16)
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16)
- ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C6]]
- ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+ ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C6]]
- ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
- ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL1]]
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX8-LABEL: name: test_intrinsic_round_v3s16
@@ -862,27 +854,19 @@ body: |
; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
; GFX8-NEXT: %25:_(s16) = disjoint G_OR [[AND4]], [[AND5]]
; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], %25
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD2]](s16)
- ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C6]]
- ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
+ ; GFX8-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C6]]
- ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
- ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL1]]
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_intrinsic_round_v3s16
@@ -929,17 +913,10 @@ body: |
; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
; GFX9-NEXT: %25:_(s16) = disjoint G_OR [[AND4]], [[AND5]]
; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], %25
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD]](s16), [[FADD1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[FADD2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll
index 1da38f8..3c6cd49 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll
@@ -4829,36 +4829,33 @@ define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x floa
; GFX9: bb.1.main_body:
; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: $vgpr0 = COPY [[DEF]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[DEF]](s32)
+ ; GFX9-NEXT: $vgpr2 = COPY [[DEF]](s32)
+ ; GFX9-NEXT: $vgpr3 = COPY [[DEF]](s32)
; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
;
; GFX10NSA-LABEL: name: getresinfo_dmask0
; GFX10NSA: bb.1.main_body:
; GFX10NSA-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
; GFX10NSA-NEXT: {{ $}}
- ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; GFX10NSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; GFX10NSA-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10NSA-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10NSA-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10NSA-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10NSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX10NSA-NEXT: $vgpr0 = COPY [[DEF]](s32)
+ ; GFX10NSA-NEXT: $vgpr1 = COPY [[DEF]](s32)
+ ; GFX10NSA-NEXT: $vgpr2 = COPY [[DEF]](s32)
+ ; GFX10NSA-NEXT: $vgpr3 = COPY [[DEF]](s32)
; GFX10NSA-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
;
; GFX12-LABEL: name: getresinfo_dmask0
; GFX12: bb.1.main_body:
; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
; GFX12-NEXT: {{ $}}
- ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; GFX12-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX12-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX12-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX12-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX12-NEXT: $vgpr0 = COPY [[DEF]](s32)
+ ; GFX12-NEXT: $vgpr1 = COPY [[DEF]](s32)
+ ; GFX12-NEXT: $vgpr2 = COPY [[DEF]](s32)
+ ; GFX12-NEXT: $vgpr3 = COPY [[DEF]](s32)
; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
main_body:
%mip = extractelement <2 x i16> %coords, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
index 8de5666..5403663 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
@@ -709,17 +709,14 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc,
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
- ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+ ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32)
+ ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
;
@@ -729,17 +726,14 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc,
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
- ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
+ ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY2]](s32)
+ ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
@@ -928,10 +922,9 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc,
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; UNPACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>)
+ ; UNPACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>)
; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
;
; PACKED-LABEL: name: image_load_v4f16_dmask_0000
@@ -940,10 +933,9 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc,
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>)
+ ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>)
; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x half> %tex
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll
index c36b1bc..50ed411 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll
@@ -279,10 +279,9 @@ define amdgpu_ps <2 x float> @image_load_v2f32_dmask_0000(<8 x i32> inreg %rsrc,
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32)
+ ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32)
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%tex = call <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret <2 x float> %tex
@@ -350,11 +349,10 @@ define amdgpu_ps <3 x float> @image_load_v3f32_dmask_0000(<8 x i32> inreg %rsrc,
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<3 x s32>)
- ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32)
+ ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32)
+ ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32)
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
%tex = call <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret <3 x float> %tex
@@ -453,12 +451,11 @@ define amdgpu_ps <4 x float> @image_load_v4f32_dmask_0000(<8 x i32> inreg %rsrc,
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32)
+ ; GCN-NEXT: $vgpr1 = COPY [[DEF]](s32)
+ ; GCN-NEXT: $vgpr2 = COPY [[DEF]](s32)
+ ; GCN-NEXT: $vgpr3 = COPY [[DEF]](s32)
; GCN-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
%tex = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret <4 x float> %tex
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
index 9c28eb0..a5a76bb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
@@ -3503,24 +3503,12 @@ body: |
; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_constant_v3s16_align8
@@ -3529,24 +3517,12 @@ body: |
; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_load_constant_v3s16_align8
@@ -3555,20 +3531,9 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 4)
@@ -3594,28 +3559,22 @@ body: |
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_constant_v3s16_align4
@@ -3629,28 +3588,22 @@ body: |
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_load_constant_v3s16_align4
@@ -3667,18 +3620,10 @@ body: |
; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, align 4, addrspace 4)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p4) = COPY $vgpr0_vgpr1
@@ -3705,28 +3650,22 @@ body: |
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_constant_v3s16_align2
@@ -3740,28 +3679,22 @@ body: |
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_load_constant_v3s16_align2
@@ -3778,18 +3711,10 @@ body: |
; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load (s16) from unknown-address + 4, addrspace 4)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p4) = COPY $vgpr0_vgpr1
@@ -3830,28 +3755,22 @@ body: |
; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_constant_v3s16_align1
@@ -3879,28 +3798,22 @@ body: |
; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p4) :: (load (s8) from unknown-address + 5, addrspace 4)
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_load_constant_v3s16_align1
@@ -3931,18 +3844,10 @@ body: |
; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p4) = COPY $vgpr0_vgpr1
@@ -5286,33 +5191,21 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
- ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
- ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+ ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
;
; VI-LABEL: name: test_load_constant_v3s64_align32
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
;
; GFX9-LABEL: name: test_load_constant_v3s64_align32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4)
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 4)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -5335,9 +5228,8 @@ body: |
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4)
; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; VI-LABEL: name: test_load_constant_v3s64_align8
@@ -5349,9 +5241,8 @@ body: |
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4)
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9-LABEL: name: test_load_constant_v3s64_align8
@@ -5363,9 +5254,8 @@ body: |
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 4)
@@ -5489,9 +5379,8 @@ body: |
; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64)
; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; VI-LABEL: name: test_load_constant_v3s64_align1
@@ -5603,9 +5492,8 @@ body: |
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9-LABEL: name: test_load_constant_v3s64_align1
@@ -5717,9 +5605,8 @@ body: |
; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index 16ce48b..f1c9a77 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -6330,26 +6330,20 @@ body: |
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32)
; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
- ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+ ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
- ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
- ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]]
+ ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_flat_v3s16_align8
@@ -6363,26 +6357,20 @@ body: |
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+ ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v3s16_align8
@@ -6391,20 +6379,9 @@ body: |
; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX11PLUS-LABEL: name: test_load_flat_v3s16_align8
@@ -6413,20 +6390,9 @@ body: |
; GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX12-LABEL: name: test_load_flat_v3s16_align8
@@ -6435,20 +6401,9 @@ body: |
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s16_align8
@@ -6457,20 +6412,9 @@ body: |
; UNALIGNED_GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; UNALIGNED_GFX9PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s16_align8
@@ -6479,20 +6423,9 @@ body: |
; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s16_align8
@@ -6501,20 +6434,9 @@ body: |
; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>))
; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 0)
@@ -6540,26 +6462,20 @@ body: |
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32)
; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
- ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+ ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
- ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
- ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]]
+ ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_flat_v3s16_align4
@@ -6573,26 +6489,20 @@ body: |
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+ ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v3s16_align4
@@ -6609,18 +6519,10 @@ body: |
; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -6638,18 +6540,10 @@ body: |
; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -6667,18 +6561,10 @@ body: |
; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -6696,18 +6582,10 @@ body: |
; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -6725,18 +6603,10 @@ body: |
; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -6754,18 +6624,10 @@ body: |
; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 4)
; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p0) = COPY $vgpr0_vgpr1
@@ -6792,28 +6654,22 @@ body: |
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_flat_v3s16_align2
@@ -6827,28 +6683,22 @@ body: |
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v3s16_align2
@@ -6865,18 +6715,10 @@ body: |
; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -6894,18 +6736,10 @@ body: |
; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -6923,18 +6757,10 @@ body: |
; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -6952,18 +6778,10 @@ body: |
; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; UNALIGNED_GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -6981,18 +6799,10 @@ body: |
; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -7010,18 +6820,10 @@ body: |
; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4)
; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p0) = COPY $vgpr0_vgpr1
@@ -7062,28 +6864,22 @@ body: |
; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_flat_v3s16_align1
@@ -7111,28 +6907,22 @@ body: |
; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5)
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v3s16_align1
@@ -7149,18 +6939,10 @@ body: |
; GFX9PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX9PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 1)
; GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -7178,18 +6960,10 @@ body: |
; GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 1)
; GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -7207,18 +6981,10 @@ body: |
; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 1)
; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -7250,18 +7016,10 @@ body: |
; UNALIGNED_GFX9PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; UNALIGNED_GFX9PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -7293,18 +7051,10 @@ body: |
; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -7336,18 +7086,10 @@ body: |
; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
- ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p0) = COPY $vgpr0_vgpr1
@@ -9927,9 +9669,8 @@ body: |
; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20)
; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; VI-LABEL: name: test_load_flat_v3s64_align32
@@ -9953,9 +9694,8 @@ body: |
; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20)
; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v3s64_align32
@@ -9967,9 +9707,8 @@ body: |
; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX11PLUS-LABEL: name: test_load_flat_v3s64_align32
@@ -9981,9 +9720,8 @@ body: |
; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX12-LABEL: name: test_load_flat_v3s64_align32
@@ -9995,9 +9733,8 @@ body: |
; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s64_align32
@@ -10009,9 +9746,8 @@ body: |
; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s64_align32
@@ -10023,9 +9759,8 @@ body: |
; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s64_align32
@@ -10037,9 +9772,8 @@ body: |
; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16)
; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 0)
@@ -10075,9 +9809,8 @@ body: |
; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20)
; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; VI-LABEL: name: test_load_flat_v3s64_align8
@@ -10101,9 +9834,8 @@ body: |
; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s64)
; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s32) from unknown-address + 20)
; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v3s64_align8
@@ -10115,9 +9847,8 @@ body: |
; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX11PLUS-LABEL: name: test_load_flat_v3s64_align8
@@ -10129,9 +9860,8 @@ body: |
; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX12-LABEL: name: test_load_flat_v3s64_align8
@@ -10143,9 +9873,8 @@ body: |
; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s64_align8
@@ -10157,9 +9886,8 @@ body: |
; UNALIGNED_GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; UNALIGNED_GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s64_align8
@@ -10171,9 +9899,8 @@ body: |
; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s64_align8
@@ -10185,9 +9912,8 @@ body: |
; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16)
; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 0)
@@ -10299,9 +10025,8 @@ body: |
; CI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32)
; CI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]]
; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR17]](s32)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; VI-LABEL: name: test_load_flat_v3s64_align1
@@ -10401,9 +10126,8 @@ body: |
; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32)
; VI-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]]
; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR17]](s32)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9PLUS-LABEL: name: test_load_flat_v3s64_align1
@@ -10415,9 +10139,8 @@ body: |
; GFX9PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 1)
; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX11PLUS-LABEL: name: test_load_flat_v3s64_align1
@@ -10429,9 +10152,8 @@ body: |
; GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 1)
; GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX12-LABEL: name: test_load_flat_v3s64_align1
@@ -10443,9 +10165,8 @@ body: |
; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 1)
; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s64_align1
@@ -10557,9 +10278,8 @@ body: |
; UNALIGNED_GFX9PLUS-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; UNALIGNED_GFX9PLUS-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; UNALIGNED_GFX9PLUS-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; UNALIGNED_GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64)
; UNALIGNED_GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX11PLUS-LABEL: name: test_load_flat_v3s64_align1
@@ -10671,9 +10391,8 @@ body: |
; UNALIGNED_GFX11PLUS-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; UNALIGNED_GFX11PLUS-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; UNALIGNED_GFX11PLUS-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64)
; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s64_align1
@@ -10785,9 +10504,8 @@ body: |
; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; UNALIGNED_GFX12-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; UNALIGNED_GFX12-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index 1b72ce5..a15c602 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -5266,24 +5266,12 @@ body: |
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v3s16_align8
@@ -5292,24 +5280,12 @@ body: |
; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>)
; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v3s16_align8
@@ -5318,24 +5294,12 @@ body: |
; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>)
; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_global_v3s16_align8
@@ -5344,24 +5308,12 @@ body: |
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8
@@ -5370,20 +5322,9 @@ body: |
; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8
@@ -5392,20 +5333,9 @@ body: |
; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1)
; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 1)
@@ -5431,28 +5361,22 @@ body: |
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v3s16_align4
@@ -5466,28 +5390,22 @@ body: |
; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v3s16_align4
@@ -5501,28 +5419,22 @@ body: |
; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_global_v3s16_align4
@@ -5536,28 +5448,22 @@ body: |
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v3s16_align4
@@ -5574,18 +5480,10 @@ body: |
; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -5603,18 +5501,10 @@ body: |
; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 4, addrspace 1)
; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
@@ -5641,28 +5531,22 @@ body: |
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v3s16_align2
@@ -5676,28 +5560,22 @@ body: |
; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v3s16_align2
@@ -5711,28 +5589,22 @@ body: |
; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; CI-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_global_v3s16_align2
@@ -5746,28 +5618,22 @@ body: |
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2
@@ -5784,18 +5650,10 @@ body: |
; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -5813,18 +5671,10 @@ body: |
; GFX9-MESA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX9-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, addrspace 1)
; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-MESA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
@@ -5865,28 +5715,22 @@ body: |
; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v3s16_align1
@@ -5900,28 +5744,22 @@ body: |
; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; CI-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; CI-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v3s16_align1
@@ -5949,28 +5787,22 @@ body: |
; CI-MESA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_global_v3s16_align1
@@ -5998,28 +5830,22 @@ body: |
; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p1) :: (load (s8) from unknown-address + 5, addrspace 1)
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1
@@ -6036,18 +5862,10 @@ body: |
; GFX9-HSA-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX9-HSA-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s16) from unknown-address + 4, align 1, addrspace 1)
; GFX9-HSA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-HSA-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -6079,18 +5897,10 @@ body: |
; GFX9-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; GFX9-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
; GFX9-MESA-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
@@ -6557,20 +6367,9 @@ body: |
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]]
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
- ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; SI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v5s16_align16
; CI-HSA: liveins: $vgpr0_vgpr1
@@ -6579,20 +6378,9 @@ body: |
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]]
- ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
- ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-HSA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; CI-HSA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v5s16_align16
; CI-MESA: liveins: $vgpr0_vgpr1
@@ -6601,20 +6389,9 @@ body: |
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]]
- ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
- ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-MESA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; CI-MESA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
;
; VI-LABEL: name: test_load_global_v5s16_align16
; VI: liveins: $vgpr0_vgpr1
@@ -6623,20 +6400,9 @@ body: |
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]]
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
- ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; VI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; VI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v5s16_align16
; GFX9-HSA: liveins: $vgpr0_vgpr1
@@ -6645,18 +6411,11 @@ body: |
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; GFX9-HSA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-HSA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>)
- ; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR]](<2 x s16>)
+ ; GFX9-HSA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
;
; GFX9-MESA-LABEL: name: test_load_global_v5s16_align16
; GFX9-MESA: liveins: $vgpr0_vgpr1
@@ -6665,18 +6424,11 @@ body: |
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; GFX9-MESA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-MESA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>)
- ; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR]](<2 x s16>)
+ ; GFX9-MESA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<5 x s16>) = G_LOAD %0 :: (load (<5 x s16>), align 16, addrspace 1)
%2:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -6703,19 +6455,16 @@ body: |
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C3]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; SI-NEXT: $vgpr2 = COPY [[BITCAST1]](<2 x s16>)
+ ; SI-NEXT: $vgpr2 = COPY [[BITCAST]](<2 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v5s16_align8
; CI-HSA: liveins: $vgpr0_vgpr1
@@ -6734,29 +6483,26 @@ body: |
; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
- ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
+ ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v5s16_align8
; CI-MESA: liveins: $vgpr0_vgpr1
@@ -6775,29 +6521,26 @@ body: |
; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
- ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
- ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
+ ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; VI-LABEL: name: test_load_global_v5s16_align8
; VI: liveins: $vgpr0_vgpr1
@@ -6816,29 +6559,26 @@ body: |
; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 8, addrspace 1)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
- ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
+ ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v5s16_align8
; GFX9-HSA: liveins: $vgpr0_vgpr1
@@ -6864,11 +6604,8 @@ body: |
; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16)
; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -6897,11 +6634,8 @@ body: |
; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16)
; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -6931,19 +6665,16 @@ body: |
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C1]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C3]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; SI-NEXT: $vgpr2 = COPY [[BITCAST1]](<2 x s16>)
+ ; SI-NEXT: $vgpr2 = COPY [[BITCAST]](<2 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v5s16_align4
; CI-HSA: liveins: $vgpr0_vgpr1
@@ -6962,29 +6693,26 @@ body: |
; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
- ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
+ ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v5s16_align4
; CI-MESA: liveins: $vgpr0_vgpr1
@@ -7003,29 +6731,26 @@ body: |
; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
- ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
- ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
+ ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; VI-LABEL: name: test_load_global_v5s16_align4
; VI: liveins: $vgpr0_vgpr1
@@ -7044,29 +6769,26 @@ body: |
; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 4, addrspace 1)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
- ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
+ ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v5s16_align4
; GFX9-HSA: liveins: $vgpr0_vgpr1
@@ -7092,11 +6814,8 @@ body: |
; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16)
; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -7125,11 +6844,8 @@ body: |
; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16)
; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -7167,29 +6883,26 @@ body: |
; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
- ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
+ ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; SI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; SI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v5s16_align2
; CI-HSA: liveins: $vgpr0_vgpr1
@@ -7208,29 +6921,26 @@ body: |
; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
- ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
+ ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v5s16_align2
; CI-MESA: liveins: $vgpr0_vgpr1
@@ -7249,29 +6959,26 @@ body: |
; CI-MESA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CI-MESA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
- ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
- ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
+ ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; VI-LABEL: name: test_load_global_v5s16_align2
; VI: liveins: $vgpr0_vgpr1
@@ -7290,29 +6997,26 @@ body: |
; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, addrspace 1)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
- ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
+ ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v5s16_align2
; GFX9-HSA: liveins: $vgpr0_vgpr1
@@ -7338,11 +7042,8 @@ body: |
; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16)
; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -7371,11 +7072,8 @@ body: |
; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16)
; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -7435,29 +7133,26 @@ body: |
; SI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32)
; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]]
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]]
; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]]
; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]]
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]]
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]]
; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]]
- ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
+ ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
- ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+ ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; SI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; SI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v5s16_align1
; CI-HSA: liveins: $vgpr0_vgpr1
@@ -7476,29 +7171,26 @@ body: |
; CI-HSA-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CI-HSA-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C3]](s64)
; CI-HSA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p1) :: (load (s16) from unknown-address + 8, align 1, addrspace 1)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]]
; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]]
; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]]
; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]]
; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]]
- ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]]
- ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32)
+ ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v5s16_align1
; CI-MESA: liveins: $vgpr0_vgpr1
@@ -7539,29 +7231,26 @@ body: |
; CI-MESA-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
; CI-MESA-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32)
; CI-MESA-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]]
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]]
; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]]
; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]]
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]]
; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]]
; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]]
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]]
- ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
+ ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
- ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+ ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; VI-LABEL: name: test_load_global_v5s16_align1
; VI: liveins: $vgpr0_vgpr1
@@ -7602,29 +7291,26 @@ body: |
; VI-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p1) :: (load (s8) from unknown-address + 9, addrspace 1)
; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32)
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]]
; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]]
; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]]
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]]
; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]]
; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]]
- ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
+ ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
- ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+ ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v5s16_align1
; GFX9-HSA: liveins: $vgpr0_vgpr1
@@ -7650,11 +7336,8 @@ body: |
; GFX9-HSA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16)
; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -7705,11 +7388,8 @@ body: |
; GFX9-MESA-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[OR4]](s32)
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[DEF]](s16)
; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -8311,21 +7991,10 @@ body: |
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]]
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
- ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; SI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; SI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
- ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
+ ; SI-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v7s16_align16
; CI-HSA: liveins: $vgpr0_vgpr1
@@ -8334,21 +8003,10 @@ body: |
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]]
- ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]]
- ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
- ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-HSA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; CI-HSA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; CI-HSA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v7s16_align16
; CI-MESA: liveins: $vgpr0_vgpr1
@@ -8357,21 +8015,10 @@ body: |
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]]
- ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]]
- ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
- ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-MESA-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; CI-MESA-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; CI-MESA-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>)
;
; VI-LABEL: name: test_load_global_v7s16_align16
; VI: liveins: $vgpr0_vgpr1
@@ -8380,21 +8027,10 @@ body: |
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]]
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
- ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; VI-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; VI-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
- ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
+ ; VI-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v7s16_align16
; GFX9-HSA: liveins: $vgpr0_vgpr1
@@ -8403,20 +8039,13 @@ body: |
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; GFX9-HSA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; GFX9-HSA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-HSA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr2 = COPY [[UV14]](<2 x s16>)
- ; GFX9-HSA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR]](<2 x s16>)
+ ; GFX9-HSA-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>)
;
; GFX9-MESA-LABEL: name: test_load_global_v7s16_align16
; GFX9-MESA: liveins: $vgpr0_vgpr1
@@ -8425,20 +8054,13 @@ body: |
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1)
; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>)
; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; GFX9-MESA-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
; GFX9-MESA-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-MESA-NEXT: $vgpr0 = COPY [[UV4]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr1 = COPY [[UV9]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr2 = COPY [[UV14]](<2 x s16>)
- ; GFX9-MESA-NEXT: $vgpr3 = COPY [[BUILD_VECTOR]](<2 x s16>)
+ ; GFX9-MESA-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<7 x s16>) = G_LOAD %0 :: (load (<7 x s16>), align 16, addrspace 1)
%2:_(<7 x s16>) = G_IMPLICIT_DEF
@@ -8480,35 +8102,32 @@ body: |
; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; SI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; SI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v7s16_align8
; CI-HSA: liveins: $vgpr0_vgpr1
@@ -8533,35 +8152,32 @@ body: |
; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; CI-HSA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v7s16_align8
; CI-MESA: liveins: $vgpr0_vgpr1
@@ -8586,35 +8202,32 @@ body: |
; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; VI-LABEL: name: test_load_global_v7s16_align8
; VI: liveins: $vgpr0_vgpr1
@@ -8639,35 +8252,32 @@ body: |
; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v7s16_align8
; GFX9-HSA: liveins: $vgpr0_vgpr1
@@ -8702,11 +8312,8 @@ body: |
; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16)
; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -8745,11 +8352,8 @@ body: |
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16)
; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -8795,35 +8399,32 @@ body: |
; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; SI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; SI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v7s16_align4
; CI-HSA: liveins: $vgpr0_vgpr1
@@ -8848,35 +8449,32 @@ body: |
; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; CI-HSA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v7s16_align4
; CI-MESA: liveins: $vgpr0_vgpr1
@@ -8901,35 +8499,32 @@ body: |
; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; VI-LABEL: name: test_load_global_v7s16_align4
; VI: liveins: $vgpr0_vgpr1
@@ -8954,35 +8549,32 @@ body: |
; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 4, addrspace 1)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v7s16_align4
; GFX9-HSA: liveins: $vgpr0_vgpr1
@@ -9017,11 +8609,8 @@ body: |
; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16)
; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -9060,11 +8649,8 @@ body: |
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16)
; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -9110,35 +8696,32 @@ body: |
; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; SI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; SI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; SI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v7s16_align2
; CI-HSA: liveins: $vgpr0_vgpr1
@@ -9163,35 +8746,32 @@ body: |
; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; CI-HSA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v7s16_align2
; CI-MESA: liveins: $vgpr0_vgpr1
@@ -9216,35 +8796,32 @@ body: |
; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; CI-MESA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; VI-LABEL: name: test_load_global_v7s16_align2
; VI: liveins: $vgpr0_vgpr1
@@ -9269,35 +8846,32 @@ body: |
; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; VI-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, addrspace 1)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v7s16_align2
; GFX9-HSA: liveins: $vgpr0_vgpr1
@@ -9332,11 +8906,8 @@ body: |
; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16)
; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -9375,11 +8946,8 @@ body: |
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16)
; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -9455,35 +9023,32 @@ body: |
; SI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32)
; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]]
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]]
; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]]
; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32)
; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]]
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]]
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]]
; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32)
; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]]
; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]]
; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32)
; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]]
- ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
- ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32)
+ ; SI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C10]], [[C9]](s32)
; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
- ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; SI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; SI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; SI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+ ; SI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; SI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; SI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; SI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; CI-HSA-LABEL: name: test_load_global_v7s16_align1
; CI-HSA: liveins: $vgpr0_vgpr1
@@ -9508,35 +9073,32 @@ body: |
; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; CI-HSA-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C5]](s64)
; CI-HSA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p1) :: (load (s16) from unknown-address + 12, align 1, addrspace 1)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]]
; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]]
; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32)
; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]]
; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]]
; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]]
; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]]
; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32)
; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]]
- ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]]
- ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32)
+ ; CI-HSA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C8]], [[C7]](s32)
; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; CI-HSA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; CI-MESA-LABEL: name: test_load_global_v7s16_align1
; CI-MESA: liveins: $vgpr0_vgpr1
@@ -9591,35 +9153,32 @@ body: |
; CI-MESA-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32)
; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]]
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]]
; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]]
; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32)
; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]]
- ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+ ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]]
; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]]
; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32)
; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]]
- ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+ ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]]
; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]]
; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32)
; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]]
- ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+ ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]]
- ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
- ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32)
+ ; CI-MESA-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C10]], [[C9]](s32)
; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]]
- ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
- ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+ ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; CI-MESA-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; VI-LABEL: name: test_load_global_v7s16_align1
; VI: liveins: $vgpr0_vgpr1
@@ -9674,35 +9233,32 @@ body: |
; VI-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p1) :: (load (s8) from unknown-address + 13, addrspace 1)
; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32)
; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]]
; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]]
; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32)
; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]]
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]]
; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]]
; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32)
; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]]
; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]]
; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32)
; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]]
- ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]]
- ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32)
+ ; VI-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C10]], [[C9]](s32)
; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
- ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
- ; VI-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
- ; VI-NEXT: $vgpr2 = COPY [[BITCAST3]](<2 x s16>)
- ; VI-NEXT: $vgpr3 = COPY [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+ ; VI-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+ ; VI-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
+ ; VI-NEXT: $vgpr2 = COPY [[BITCAST2]](<2 x s16>)
+ ; VI-NEXT: $vgpr3 = COPY [[BITCAST3]](<2 x s16>)
;
; GFX9-HSA-LABEL: name: test_load_global_v7s16_align1
; GFX9-HSA: liveins: $vgpr0_vgpr1
@@ -9737,11 +9293,8 @@ body: |
; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-HSA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16)
; GFX9-HSA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-HSA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -9810,11 +9363,8 @@ body: |
; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>)
- ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-MESA-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16)
; GFX9-MESA-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr1 = COPY [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-MESA-NEXT: $vgpr2 = COPY [[BUILD_VECTOR2]](<2 x s16>)
@@ -11471,66 +11021,42 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
- ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+ ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
;
; CI-HSA-LABEL: name: test_load_global_v3s64_align32
; CI-HSA: liveins: $vgpr0_vgpr1
; CI-HSA-NEXT: {{ $}}
; CI-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
- ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
- ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+ ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
;
; CI-MESA-LABEL: name: test_load_global_v3s64_align32
; CI-MESA: liveins: $vgpr0_vgpr1
; CI-MESA-NEXT: {{ $}}
; CI-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
- ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+ ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
;
; VI-LABEL: name: test_load_global_v3s64_align32
; VI: liveins: $vgpr0_vgpr1
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
- ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+ ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
;
; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32
; GFX9-HSA: liveins: $vgpr0_vgpr1
; GFX9-HSA-NEXT: {{ $}}
; GFX9-HSA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
- ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
- ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+ ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
;
; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32
; GFX9-MESA: liveins: $vgpr0_vgpr1
; GFX9-MESA-NEXT: {{ $}}
; GFX9-MESA-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1)
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64)
- ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+ ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<4 x s64>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 1)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -11553,9 +11079,8 @@ body: |
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; CI-HSA-LABEL: name: test_load_global_v3s64_align8
@@ -11567,9 +11092,8 @@ body: |
; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; CI-MESA-LABEL: name: test_load_global_v3s64_align8
@@ -11581,9 +11105,8 @@ body: |
; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; VI-LABEL: name: test_load_global_v3s64_align8
@@ -11595,9 +11118,8 @@ body: |
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9-HSA-LABEL: name: test_load_global_v3s64_align8
@@ -11609,9 +11131,8 @@ body: |
; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9-MESA-LABEL: name: test_load_global_v3s64_align8
@@ -11623,9 +11144,8 @@ body: |
; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1)
; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 1)
@@ -11749,9 +11269,8 @@ body: |
; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; SI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; CI-HSA-LABEL: name: test_load_global_v3s64_align1
@@ -11763,9 +11282,8 @@ body: |
; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1)
; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; CI-MESA-LABEL: name: test_load_global_v3s64_align1
@@ -11877,9 +11395,8 @@ body: |
; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64)
; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; VI-LABEL: name: test_load_global_v3s64_align1
@@ -11991,9 +11508,8 @@ body: |
; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9-HSA-LABEL: name: test_load_global_v3s64_align1
@@ -12005,9 +11521,8 @@ body: |
; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1)
; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9-MESA-LABEL: name: test_load_global_v3s64_align1
@@ -12119,9 +11634,8 @@ body: |
; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32)
; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]]
- ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64)
+ ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[DEF]](s64)
; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index ff43b07..a38d054 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -8972,24 +8972,12 @@ body: |
; SI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-LABEL: name: test_load_local_v3s16_align8
@@ -8998,24 +8986,12 @@ body: |
; CI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-DS128-LABEL: name: test_load_local_v3s16_align8
@@ -9024,24 +9000,12 @@ body: |
; CI-DS128-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>)
; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_local_v3s16_align8
@@ -9050,24 +9014,12 @@ body: |
; VI-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BITCAST]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_load_local_v3s16_align8
@@ -9076,20 +9028,9 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8
@@ -9098,20 +9039,9 @@ body: |
; GFX9-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX10-LABEL: name: test_load_local_v3s16_align8
@@ -9120,20 +9050,9 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align8
@@ -9142,20 +9061,9 @@ body: |
; GFX10-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX10-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX10-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX10-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX11-LABEL: name: test_load_local_v3s16_align8
@@ -9164,20 +9072,9 @@ body: |
; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s16_align8
@@ -9186,20 +9083,9 @@ body: |
; GFX11-UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX11-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3)
; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11-UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX11-UNALIGNED-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p3) = COPY $vgpr0
%1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 3)
@@ -9225,28 +9111,22 @@ body: |
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-LABEL: name: test_load_local_v3s16_align2
@@ -9260,28 +9140,22 @@ body: |
; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-DS128-LABEL: name: test_load_local_v3s16_align2
@@ -9295,28 +9169,22 @@ body: |
; CI-DS128-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CI-DS128-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
- ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; CI-DS128-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; CI-DS128-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-DS128-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI-DS128-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-DS128-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; CI-DS128-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_local_v3s16_align2
@@ -9330,28 +9198,22 @@ body: |
; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_load_local_v3s16_align2
@@ -9368,18 +9230,10 @@ body: |
; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9397,18 +9251,10 @@ body: |
; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9426,18 +9272,10 @@ body: |
; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9455,18 +9293,10 @@ body: |
; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9484,18 +9314,10 @@ body: |
; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9513,18 +9335,10 @@ body: |
; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, addrspace 3)
; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX11-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p3) = COPY $vgpr0
@@ -9565,28 +9379,22 @@ body: |
; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-LABEL: name: test_load_local_v3s16_align1
@@ -9614,28 +9422,22 @@ body: |
; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-DS128-LABEL: name: test_load_local_v3s16_align1
@@ -9663,28 +9465,22 @@ body: |
; CI-DS128-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
; CI-DS128-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; CI-DS128-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-DS128-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-DS128-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; CI-DS128-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; CI-DS128-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-DS128-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-DS128-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; CI-DS128-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-DS128-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; CI-DS128-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-DS128-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-DS128-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; CI-DS128-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; CI-DS128-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; CI-DS128-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CI-DS128-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; CI-DS128-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CI-DS128-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_local_v3s16_align1
@@ -9712,28 +9508,22 @@ body: |
; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load (s8) from unknown-address + 5, addrspace 3)
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_load_local_v3s16_align1
@@ -9764,18 +9554,10 @@ body: |
; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9793,18 +9575,10 @@ body: |
; GFX9-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX9-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3)
; GFX9-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX9-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9836,18 +9610,10 @@ body: |
; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9865,18 +9631,10 @@ body: |
; GFX10-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX10-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3)
; GFX10-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX10-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX10-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX10-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX10-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX10-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9908,18 +9666,10 @@ body: |
; GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
- ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9937,18 +9687,10 @@ body: |
; GFX11-UNALIGNED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX11-UNALIGNED-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load (s16) from unknown-address + 4, align 1, addrspace 3)
; GFX11-UNALIGNED-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX11-UNALIGNED-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11-UNALIGNED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX11-UNALIGNED-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11-UNALIGNED-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX11-UNALIGNED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p3) = COPY $vgpr0
@@ -14156,9 +13898,8 @@ body: |
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[DEF]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; CI-LABEL: name: test_load_local_v3s64_align32
@@ -14172,9 +13913,8 @@ body: |
; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[DEF]](s64)
; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; CI-DS128-LABEL: name: test_load_local_v3s64_align32
@@ -14186,9 +13926,8 @@ body: |
; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32)
; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; VI-LABEL: name: test_load_local_v3s64_align32
@@ -14200,9 +13939,8 @@ body: |
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32)
; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9-LABEL: name: test_load_local_v3s64_align32
@@ -14214,9 +13952,8 @@ body: |
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32)
; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32
@@ -14228,9 +13965,8 @@ body: |
; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32)
; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX10-LABEL: name: test_load_local_v3s64_align32
@@ -14242,9 +13978,8 @@ body: |
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32)
; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s64_align32
@@ -14256,9 +13991,8 @@ body: |
; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32)
; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX11-LABEL: name: test_load_local_v3s64_align32
@@ -14270,9 +14004,8 @@ body: |
; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32)
; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; GFX11-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX11-UNALIGNED-LABEL: name: test_load_local_v3s64_align32
@@ -14284,9 +14017,8 @@ body: |
; GFX11-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32)
; GFX11-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3)
; GFX11-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX11-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX11-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX11-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX11-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p3) = COPY $vgpr0
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 3)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
index 7498def..6f8aaab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir
@@ -9605,26 +9605,20 @@ body: |
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]]
+ ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-LABEL: name: test_load_private_v3s16_align8
@@ -9638,26 +9632,20 @@ body: |
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32)
; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
- ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+ ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
- ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
- ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]]
+ ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_private_v3s16_align8
@@ -9671,26 +9659,20 @@ body: |
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LOAD]](<2 x s16>)
; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+ ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL1]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_load_private_v3s16_align8
@@ -9707,17 +9689,10 @@ body: |
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9735,17 +9710,10 @@ body: |
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16)
- ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9755,20 +9723,9 @@ body: |
; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5)
; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX12-LABEL: name: test_load_private_v3s16_align8
@@ -9777,20 +9734,9 @@ body: |
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5)
; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s16_align8
@@ -9807,17 +9753,10 @@ body: |
; UNALIGNED_GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; UNALIGNED_GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; UNALIGNED_GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; UNALIGNED_GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9835,17 +9774,10 @@ body: |
; UNALIGNED_GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; UNALIGNED_GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; UNALIGNED_GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; UNALIGNED_GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -9855,20 +9787,9 @@ body: |
; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5)
; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX11-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s16_align8
@@ -9877,20 +9798,9 @@ body: |
; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), addrspace 5)
; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
+ ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p5) = COPY $vgpr0
%1:_(<3 x s16>) = G_LOAD %0 :: (load (<3 x s16>), align 8, addrspace 5)
@@ -9916,28 +9826,22 @@ body: |
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-LABEL: name: test_load_private_v3s16_align2
@@ -9951,28 +9855,22 @@ body: |
; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_private_v3s16_align2
@@ -9986,28 +9884,22 @@ body: |
; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C3]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C3]]
- ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C2]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C2]]
+ ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C3]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C2]]
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C3]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C2]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL2]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C3]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_load_private_v3s16_align2
@@ -10024,17 +9916,10 @@ body: |
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10052,17 +9937,10 @@ body: |
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16)
- ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10080,18 +9958,10 @@ body: |
; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10109,18 +9979,10 @@ body: |
; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10138,18 +10000,10 @@ body: |
; UNALIGNED_GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; UNALIGNED_GFX9-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; UNALIGNED_GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10167,18 +10021,10 @@ body: |
; UNALIGNED_GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; UNALIGNED_GFX10-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; UNALIGNED_GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10196,18 +10042,10 @@ body: |
; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; UNALIGNED_GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10225,18 +10063,10 @@ body: |
; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5)
; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p5) = COPY $vgpr0
@@ -10277,28 +10107,22 @@ body: |
; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; CI-LABEL: name: test_load_private_v3s16_align1
@@ -10326,28 +10150,22 @@ body: |
; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; CI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; CI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; CI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; CI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; CI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_load_private_v3s16_align1
@@ -10375,28 +10193,22 @@ body: |
; VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5)
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C5]]
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C5]]
- ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C4]]
+ ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C4]]
+ ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL3]]
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C5]]
- ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]]
- ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C4]]
+ ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]]
- ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
- ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL5]]
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C5]](s32)
+ ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL5]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_load_private_v3s16_align1
@@ -10413,17 +10225,10 @@ body: |
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10441,17 +10246,10 @@ body: |
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC3]](s16)
- ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10469,18 +10267,10 @@ body: |
; GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, align 1, addrspace 5)
; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10498,18 +10288,10 @@ body: |
; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, align 1, addrspace 5)
; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
- ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10541,18 +10323,10 @@ body: |
; UNALIGNED_GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; UNALIGNED_GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
; UNALIGNED_GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
- ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; UNALIGNED_GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10584,18 +10358,10 @@ body: |
; UNALIGNED_GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; UNALIGNED_GFX10-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
; UNALIGNED_GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
- ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; UNALIGNED_GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10627,18 +10393,10 @@ body: |
; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
- ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; UNALIGNED_GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
@@ -10670,18 +10428,10 @@ body: |
; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32)
; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]]
; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
- ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(p5) = COPY $vgpr0
@@ -15927,9 +15677,8 @@ body: |
; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; CI-LABEL: name: test_load_private_v3s64_align32
@@ -15953,9 +15702,8 @@ body: |
; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; VI-LABEL: name: test_load_private_v3s64_align32
@@ -15979,9 +15727,8 @@ body: |
; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9-LABEL: name: test_load_private_v3s64_align32
@@ -16005,9 +15752,8 @@ body: |
; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX10-LABEL: name: test_load_private_v3s64_align32
@@ -16031,9 +15777,8 @@ body: |
; GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
; GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX11-LABEL: name: test_load_private_v3s64_align32
@@ -16045,9 +15790,8 @@ body: |
; GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32)
; GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5)
; GFX11-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX12-LABEL: name: test_load_private_v3s64_align32
@@ -16059,9 +15803,8 @@ body: |
; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32)
; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5)
; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s64_align32
@@ -16085,9 +15828,8 @@ body: |
; UNALIGNED_GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
; UNALIGNED_GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
; UNALIGNED_GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; UNALIGNED_GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; UNALIGNED_GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX10-LABEL: name: test_load_private_v3s64_align32
@@ -16111,9 +15853,8 @@ body: |
; UNALIGNED_GFX10-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
; UNALIGNED_GFX10-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5)
; UNALIGNED_GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
- ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64)
+ ; UNALIGNED_GFX10-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[DEF]](s64)
; UNALIGNED_GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX11-LABEL: name: test_load_private_v3s64_align32
@@ -16125,9 +15866,8 @@ body: |
; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32)
; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5)
; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX11-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s64_align32
@@ -16139,9 +15879,8 @@ body: |
; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s32)
; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p5) :: (load (s64) from unknown-address + 16, align 16, addrspace 5)
; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
- ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64)
+ ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[DEF]](s64)
; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(p5) = COPY $vgpr0
%1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
index 7029cad..4546f30 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
@@ -509,9 +509,8 @@ body: |
; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32)
; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[DEF]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; VI-LABEL: name: test_lshr_v3s64_v3s32
@@ -524,9 +523,8 @@ body: |
; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32)
; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[DEF]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9-LABEL: name: test_lshr_v3s64_v3s32
@@ -539,9 +537,8 @@ body: |
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[DEF]](s64)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%1:_(<3 x s64>) = G_EXTRACT %0, 0
@@ -715,20 +712,17 @@ body: |
; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]]
- ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]]
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
;
; VI-LABEL: name: test_lshr_v3s16_v3s16
@@ -754,20 +748,17 @@ body: |
; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16)
; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16)
; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16)
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16)
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C]](s32)
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
;
; GFX9-LABEL: name: test_lshr_v3s16_v3s16
@@ -790,13 +781,9 @@ body: |
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[TRUNC4]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
@@ -836,26 +823,20 @@ body: |
; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]]
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]]
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]]
- ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL1]]
- ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL2]]
- ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL2]]
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_ashr_v3s16_v3s16
@@ -881,27 +862,20 @@ body: |
; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16)
; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16)
; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16)
; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL2]]
- ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_ashr_v3s16_v3s16
@@ -924,17 +898,10 @@ body: |
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[TRUNC4]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[LSHR1]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
index ef7759f..c9a3060 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
@@ -376,22 +376,17 @@ body: |
bb.0:
; CHECK-LABEL: name: test_or_v5s32
- ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
- ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV4]], [[UV9]]
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>)
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR1]](<2 x s32>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[OR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR1]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[OR2]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>)
%0:_(<5 x s32>) = G_IMPLICIT_DEF
%1:_(<5 x s32>) = G_IMPLICIT_DEF
@@ -459,24 +454,12 @@ body: |
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[BITCAST]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
@@ -514,63 +497,62 @@ body: |
bb.0:
; CHECK-LABEL: name: test_or_v5s16
- ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR4]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL4]]
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL5]]
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
- ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]]
- ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL6]]
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
+ ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL7]]
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>)
- ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]]
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR5]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
- ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32)
- ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]]
- ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
- ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL5]]
- ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
- ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32)
- ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]]
- ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>)
+ ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]]
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR9]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
+ ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]]
+ ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C2]]
+ ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL9]]
+ ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+ ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL10]]
+ ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32)
+ ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL11]]
+ ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>)
%0:_(<5 x s16>) = G_IMPLICIT_DEF
%1:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -586,13 +568,10 @@ body: |
bb.0:
; CHECK-LABEL: name: test_or_v3s8
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]]
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]]
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s8>) = G_IMPLICIT_DEF
@@ -608,14 +587,11 @@ body: |
bb.0:
; CHECK-LABEL: name: test_or_v4s8
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]]
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]]
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]]
- ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UV7]]
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[DEF]], [[DEF]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s8>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
index f1c4994..12e2cb9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir
@@ -165,24 +165,12 @@ body: |
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[CONCAT_VECTORS]](<4 x s16>), %bb.0, [[CONCAT_VECTORS1]](<4 x s16>), %bb.1
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C6]](s32)
- ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C7]]
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C7]]
- ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32)
- ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
- ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]]
- ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32)
- ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL4]]
- ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>)
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C6]], [[C7]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C6]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31
bb.0:
@@ -522,7 +510,8 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
@@ -532,29 +521,27 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>)
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV16]]
- ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV17]]
- ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV18]]
- ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV19]]
- ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV20]]
- ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV21]]
- ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV22]]
- ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV23]]
- ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV24]]
- ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV25]]
- ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV26]]
- ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV27]]
- ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV28]]
- ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV29]]
- ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV30]]
- ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV31]]
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32)
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1
; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<16 x s32>)
bb.0:
successors: %bb.1, %bb.2
@@ -589,7 +576,8 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
@@ -599,45 +587,43 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
- ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV32]]
- ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV33]]
- ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV34]]
- ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV35]]
- ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV36]]
- ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV37]]
- ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV38]]
- ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV39]]
- ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV40]]
- ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV41]]
- ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV42]]
- ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV43]]
- ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV44]]
- ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV45]]
- ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV46]]
- ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV47]]
- ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV48]]
- ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV49]]
- ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV50]]
- ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV51]]
- ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV52]]
- ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV53]]
- ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV54]]
- ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV55]]
- ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV56]]
- ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV57]]
- ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV58]]
- ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV59]]
- ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV60]]
- ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV61]]
- ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV62]]
- ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV63]]
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32)
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<32 x s32>) = G_PHI [[DEF]](<32 x s32>), %bb.0, [[BUILD_VECTOR]](<32 x s32>), %bb.1
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<32 x s32>) = G_PHI [[DEF1]](<32 x s32>), %bb.0, [[BUILD_VECTOR]](<32 x s32>), %bb.1
; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<32 x s32>)
bb.0:
successors: %bb.1, %bb.2
@@ -672,86 +658,81 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<16 x s32>), [[UV3:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
- ; CHECK-NEXT: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32), [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
- ; CHECK-NEXT: [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32), [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32), [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
- ; CHECK-NEXT: [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32), [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32), [[UV128:%[0-9]+]]:_(s32), [[UV129:%[0-9]+]]:_(s32), [[UV130:%[0-9]+]]:_(s32), [[UV131:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>)
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]]
- ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]]
- ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]]
- ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]]
- ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]]
- ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]]
- ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]]
- ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]]
- ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]]
- ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]]
- ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]]
- ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]]
- ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]]
- ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]]
- ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]]
- ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]]
- ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]]
- ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]]
- ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]]
- ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]]
- ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]]
- ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]]
- ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]]
- ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]]
- ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]]
- ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]]
- ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]]
- ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]]
- ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]]
- ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]]
- ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]]
- ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]]
- ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]]
- ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]]
- ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]]
- ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]]
- ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]]
- ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]]
- ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]]
- ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]]
- ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]]
- ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]]
- ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]]
- ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]]
- ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]]
- ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]]
- ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]]
- ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]]
- ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]]
- ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]]
- ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]]
- ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]]
- ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]]
- ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]]
- ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]]
- ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]]
- ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]]
- ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]]
- ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]]
- ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]]
- ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV64]], [[UV128]]
- ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV65]], [[UV129]]
- ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV66]], [[UV130]]
- ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV67]], [[UV131]]
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[DEF]], [[DEF]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32)
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32)
@@ -759,10 +740,10 @@ body: |
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV1]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1
- ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV2]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1
- ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV3]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF1]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>)
; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>)
bb.0:
@@ -946,10 +927,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[BUILD_VECTOR]](<3 x s64>), %bb.0, [[BUILD_VECTOR1]](<3 x s64>), %bb.1
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s64), [[UV17:%[0-9]+]]:_(s64), [[UV18:%[0-9]+]]:_(s64), [[UV19:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s64), [[UV21:%[0-9]+]]:_(s64), [[UV22:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[PHI]](<3 x s64>)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV20]](s64), [[UV21]](s64), [[UV22]](s64), [[UV19]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s64), [[UV17:%[0-9]+]]:_(s64), [[UV18:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[PHI]](<3 x s64>)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV16]](s64), [[UV17]](s64), [[UV18]](s64), [[DEF]](s64)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR2]](<4 x s64>)
; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
index 16ad07e..07f20dc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
@@ -242,32 +242,25 @@ body: |
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]]
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND5]](s32), [[AND6]](s32), [[AND7]](s32)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
index 4c0f4e6..832c80f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
@@ -506,27 +506,19 @@ body: |
; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]]
; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]]
; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32)
- ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]]
; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]]
; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]]
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]]
- ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
- ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
- ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
- ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]]
- ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL7]]
+ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX8-LABEL: name: saddsat_v3s16
@@ -571,27 +563,19 @@ body: |
; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC5]]
; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]]
; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]]
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16)
- ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
- ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
- ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]]
- ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL1]]
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: saddsat_v3s16
@@ -600,38 +584,28 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16)
- ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV]], [[BUILD_VECTOR1]]
- ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV]], [[BUILD_VECTOR]]
+ ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV1]], [[BUILD_VECTOR1]]
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+ ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
index 12bcecd..be57687 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
@@ -404,24 +404,11 @@ body: |
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>)
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[BITCAST]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
@@ -1003,19 +990,16 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x p0>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x p0>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]]
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0), [[UV2:%[0-9]+]]:_(p0), [[UV3:%[0-9]+]]:_(p0), [[UV4:%[0-9]+]]:_(p0), [[UV5:%[0-9]+]]:_(p0), [[UV6:%[0-9]+]]:_(p0), [[UV7:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[DEF]](<8 x p0>)
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(p0), [[UV9:%[0-9]+]]:_(p0), [[UV10:%[0-9]+]]:_(p0), [[UV11:%[0-9]+]]:_(p0), [[UV12:%[0-9]+]]:_(p0), [[UV13:%[0-9]+]]:_(p0), [[UV14:%[0-9]+]]:_(p0), [[UV15:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[DEF1]](<8 x p0>)
- ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV]], [[UV8]]
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV9]]
- ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV2]], [[UV10]]
- ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV3]], [[UV11]]
- ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV4]], [[UV12]]
- ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV5]], [[UV13]]
- ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV6]], [[UV14]]
- ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[UV7]], [[UV15]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[DEF]], [[DEF]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0), [[SELECT2]](p0), [[SELECT3]](p0), [[SELECT4]](p0), [[SELECT5]](p0), [[SELECT6]](p0), [[SELECT7]](p0)
; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<8 x p0>)
%0:_(s32) = G_CONSTANT i32 0
@@ -1345,27 +1329,20 @@ body: |
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC3]]
; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[TRUNC1]], [[TRUNC4]]
; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[TRUNC2]], [[TRUNC5]]
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16)
; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir
index 2e764da..b307891 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir
@@ -924,37 +924,26 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16)
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV3]], [[BUILD_VECTOR1]](<2 x s16>)
- ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<2 x s16>)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16)
- ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR]], [[BUILD_VECTOR2]](<2 x s16>)
- ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR2]](<2 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
+ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV3]], [[BUILD_VECTOR]](<2 x s16>)
+ ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x s16>)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
+ ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR1]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX8-LABEL: name: test_sext_inreg_v3s16_1
@@ -969,27 +958,21 @@ body: |
; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1
; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1
; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]]
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]]
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]]
- ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
- ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL2]]
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX6-LABEL: name: test_sext_inreg_v3s16_1
@@ -1004,27 +987,21 @@ body: |
; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1
; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1
; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1
- ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]]
; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]]
; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]]
- ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
- ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL2]]
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
@@ -1210,89 +1187,74 @@ body: |
bb.0:
; GFX9-LABEL: name: test_sext_inreg_v6s16_1
- ; GFX9: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
+ ; GFX9: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR]](<2 x s16>)
+ ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[DEF]], [[BUILD_VECTOR]](<2 x s16>)
; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
- ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR1]](<2 x s16>)
+ ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[DEF]], [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR1]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
- ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV2]], [[BUILD_VECTOR2]](<2 x s16>)
+ ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[DEF]], [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL2]], [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>), [[ASHR2]](<2 x s16>)
; GFX9-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX8-LABEL: name: test_sext_inreg_v6s16_1
- ; GFX8: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1
- ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1
- ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1
- ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 1
- ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 1
- ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 1
- ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]]
- ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]]
- ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; GFX8: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1
+ ; GFX8-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1
+ ; GFX8-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1
+ ; GFX8-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1
+ ; GFX8-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1
+ ; GFX8-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1
+ ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]]
+ ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]]
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]]
- ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C1]]
- ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]]
+ ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]]
+ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C1]]
- ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C1]]
- ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C]]
+ ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C]]
+ ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; GFX8-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX6-LABEL: name: test_sext_inreg_v6s16_1
- ; GFX6: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 1
- ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 1
- ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 1
- ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 1
- ; GFX6-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 1
- ; GFX6-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 1
- ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C1]]
- ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C1]]
- ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; GFX6: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1
+ ; GFX6-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1
+ ; GFX6-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1
+ ; GFX6-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1
+ ; GFX6-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1
+ ; GFX6-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[DEF]], 1
+ ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG]], [[C]]
+ ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG1]], [[C]]
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C1]]
- ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C1]]
- ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG2]], [[C]]
+ ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG3]], [[C]]
+ ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C1]]
- ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C1]]
- ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG4]], [[C]]
+ ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SEXT_INREG5]], [[C]]
+ ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32)
; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>)
; GFX6-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = G_IMPLICIT_DEF
%1:_(<6 x s16>) = G_SEXT_INREG %0, 1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
index ba66106..15399ce 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
@@ -502,9 +502,8 @@ body: |
; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32)
; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32)
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[DEF]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; VI-LABEL: name: test_shl_v3s64_v3s32
@@ -517,9 +516,8 @@ body: |
; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32)
; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32)
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[DEF]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
;
; GFX9-LABEL: name: test_shl_v3s64_v3s32
@@ -532,9 +530,8 @@ body: |
; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32)
; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32)
; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[DEF]](s64)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%1:_(<3 x s64>) = G_EXTRACT %0, 0
@@ -703,26 +700,20 @@ body: |
; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[LSHR1]](s32)
; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[AND1]](s32)
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SHL]], [[C1]]
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SHL1]], [[C1]]
; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL3]]
- ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C1]]
- ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]]
- ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
- ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL5]]
- ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL5]]
+ ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; VI-LABEL: name: test_shl_v3s16_v3s16
@@ -748,27 +739,20 @@ body: |
; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16)
; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16)
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16)
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16)
; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16)
; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
- ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
- ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL5]]
- ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL5]]
+ ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: test_shl_v3s16_v3s16
@@ -791,17 +775,10 @@ body: |
; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL1]](s16), [[TRUNC4]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[TRUNC6]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL1]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
index 45714fd9..b5ec2ac 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
@@ -474,52 +474,20 @@ body: |
; SI-LABEL: name: test_smax_v3s16
; SI: liveins: $vgpr0, $vgpr1
; SI-NEXT: {{ $}}
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
- ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16
- ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]]
- ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
- ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16
- ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]]
- ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16
- ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16
- ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]]
+ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[C]], [[C]]
+ ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[C]], [[C]]
+ ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[C]], [[C]]
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32)
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
;
; VI-LABEL: name: test_smax_v3s16
; VI: liveins: $vgpr0, $vgpr1
; VI-NEXT: {{ $}}
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC3]]
- ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC4]]
- ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC5]]
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[DEF]], [[DEF]]
+ ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[DEF]], [[DEF]]
+ ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[DEF]], [[DEF]]
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16)
; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16)
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX2]](s16)
@@ -529,42 +497,32 @@ body: |
; GFX9-LABEL: name: test_smax_v3s16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[DEF]], [[DEF]]
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]]
- ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]]
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
- ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
+ ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>)
;
; GFX1250-LABEL: name: test_smax_v3s16
; GFX1250: liveins: $vgpr0, $vgpr1
; GFX1250-NEXT: {{ $}}
- ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX1250-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX1250-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX1250-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX1250-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX1250-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[DEF]], [[DEF]]
+ ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>)
; GFX1250-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX1250-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX1250-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX1250-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX1250-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]]
- ; GFX1250-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]]
- ; GFX1250-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>)
- ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX1250-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16)
- ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
- ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ; GFX1250-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
+ ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_IMPLICIT_DEF
%2:_(<3 x s16>) = G_SMAX %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
index 88fe5d0..3eaecf6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
@@ -474,52 +474,20 @@ body: |
; SI-LABEL: name: test_smin_v3s16
; SI: liveins: $vgpr0, $vgpr1
; SI-NEXT: {{ $}}
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16
- ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16
- ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]]
- ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16
- ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16
- ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]]
- ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16
- ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16
- ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]]
+ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[C]], [[C]]
+ ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[C]], [[C]]
+ ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[C]], [[C]]
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32)
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
;
; VI-LABEL: name: test_smin_v3s16
; VI: liveins: $vgpr0, $vgpr1
; VI-NEXT: {{ $}}
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC3]]
- ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC4]]
- ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC5]]
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[DEF]], [[DEF]]
+ ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[DEF]], [[DEF]]
+ ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[DEF]], [[DEF]]
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16)
; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16)
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN2]](s16)
@@ -529,42 +497,32 @@ body: |
; GFX9-LABEL: name: test_smin_v3s16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[DEF]], [[DEF]]
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]]
- ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]]
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
- ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
+ ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>)
;
; GFX1250-LABEL: name: test_smin_v3s16
; GFX1250: liveins: $vgpr0, $vgpr1
; GFX1250-NEXT: {{ $}}
- ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX1250-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX1250-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX1250-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX1250-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX1250-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[DEF]], [[DEF]]
+ ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>)
; GFX1250-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX1250-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX1250-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX1250-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX1250-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]]
- ; GFX1250-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]]
- ; GFX1250-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>)
- ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX1250-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16)
- ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
- ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ; GFX1250-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
+ ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_IMPLICIT_DEF
%2:_(<3 x s16>) = G_SMIN %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir
index 07d8df2..de50fce 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir
@@ -551,26 +551,18 @@ body: |
; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]]
; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]]
; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32)
- ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]]
; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]]
; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL6]]
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C4]]
- ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
- ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
+ ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL7]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
- ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
- ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]]
- ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL7]]
+ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX8-LABEL: name: sshlsat_v3s16
@@ -612,27 +604,19 @@ body: |
; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]]
; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]]
; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]]
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16)
; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16)
- ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
- ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
- ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL5]]
- ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL4]]
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: sshlsat_v3s16
@@ -674,17 +658,10 @@ body: |
; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]]
; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]]
; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]]
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT1]](s16), [[SELECT3]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT5]](s16), [[TRUNC6]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT5]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
index aa59de0..d28edb0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
@@ -242,32 +242,25 @@ body: |
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C2]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C2]]
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]]
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND5]](s32), [[AND6]](s32), [[AND7]](s32)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
index 67c715f..dc49872 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
@@ -506,27 +506,20 @@ body: |
; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]]
; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]]
; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32)
- ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]]
; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]]
; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]]
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]]
- ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
- ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
+ ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
- ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
- ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL8]]
- ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C5]], [[SHL7]]
+ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX8-LABEL: name: ssubsat_v3s16
@@ -571,27 +564,19 @@ body: |
; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC5]]
; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]]
; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]]
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16)
- ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]]
- ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]]
- ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]]
- ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C4]], [[SHL1]]
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: ssubsat_v3s16
@@ -600,38 +585,28 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16)
- ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV]], [[BUILD_VECTOR1]]
- ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV]], [[BUILD_VECTOR]]
+ ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV1]], [[BUILD_VECTOR1]]
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+ ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
index acbcb098..00440ee 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
@@ -6744,93 +6744,120 @@ body: |
; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>)
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>)
- ; SI-NEXT: G_STORE [[BITCAST2]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
- ; SI-NEXT: G_STORE [[BITCAST]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
- ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
- ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
- ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
- ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
+ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]]
+ ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]]
+ ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]]
+ ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>)
+ ; SI-NEXT: G_STORE [[BITCAST4]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; SI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
+ ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
+ ; SI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+ ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C4]](s64)
+ ; SI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
;
; CI-LABEL: name: test_store_global_v11s16_align4
; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
- ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
- ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>)
- ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>)
- ; CI-NEXT: G_STORE [[BITCAST2]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
- ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CI-NEXT: G_STORE [[BITCAST]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
- ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
- ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
- ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
- ; CI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
+ ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]]
+ ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]]
+ ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]]
+ ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>)
+ ; CI-NEXT: G_STORE [[BITCAST4]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
+ ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
+ ; CI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+ ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C4]](s64)
+ ; CI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
;
; VI-LABEL: name: test_store_global_v11s16_align4
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>)
- ; VI-NEXT: G_STORE [[BITCAST2]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
- ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
- ; VI-NEXT: G_STORE [[BITCAST]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
- ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
- ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
- ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
- ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
+ ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]]
+ ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]]
+ ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]]
+ ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
+ ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>)
+ ; VI-NEXT: G_STORE [[BITCAST4]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; VI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
+ ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
+ ; VI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+ ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C4]](s64)
+ ; VI-NEXT: G_STORE [[DEF]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
;
; GFX9-LABEL: name: test_store_global_v11s16_align4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>)
- ; GFX9-NEXT: G_STORE [[BITCAST2]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
- ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C1]](s64)
- ; GFX9-NEXT: G_STORE [[BITCAST]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
- ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
- ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
- ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
- ; GFX9-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>)
+ ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
+ ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: G_STORE [[DEF1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1)
+ ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
+ ; GFX9-NEXT: G_STORE [[DEF1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1)
+ ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = nuw inbounds G_PTR_ADD [[PTR_ADD]], [[C2]](s64)
+ ; GFX9-NEXT: G_STORE [[DEF1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<11 x s16>) = G_IMPLICIT_DEF
G_STORE %1, %0 :: (store (<11 x s16>), align 16, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
index 7fd2319..ca3abaa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
@@ -650,15 +650,14 @@ body: |
; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]]
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
@@ -675,12 +674,11 @@ body: |
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+ ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
@@ -708,14 +706,13 @@ body: |
; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]]
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
@@ -726,12 +723,11 @@ body: |
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+ ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
@@ -754,14 +750,13 @@ body: |
; SI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]]
+ ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C2]]
; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
@@ -772,12 +767,11 @@ body: |
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+ ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir
index b2fe9b8..f07c078 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir
@@ -199,31 +199,24 @@ body: |
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32)
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
- ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
- ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]]
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND9]](s32), [[AND10]](s32), [[AND11]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND7]](s32), [[AND8]](s32), [[AND9]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
index 87ab135..e454798 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
@@ -387,24 +387,16 @@ body: |
; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]]
; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]]
; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32)
- ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]]
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]]
- ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL7]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
- ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]]
- ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL7]]
+ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX8-LABEL: name: uaddsat_v3s16
@@ -428,27 +420,19 @@ body: |
; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC3]]
; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC4]]
; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC5]]
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16)
- ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]]
- ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: uaddsat_v3s16
@@ -457,38 +441,28 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16)
- ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV]], [[BUILD_VECTOR1]]
- ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV]], [[BUILD_VECTOR]]
+ ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV1]], [[BUILD_VECTOR1]]
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+ ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
index 32b526e..f89425f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
@@ -479,51 +479,20 @@ body: |
; SI-LABEL: name: test_umax_v3s16
; SI: liveins: $vgpr0, $vgpr1
; SI-NEXT: {{ $}}
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]]
- ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[LSHR]], [[LSHR1]]
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]]
+ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[C]], [[C]]
+ ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[C]], [[C]]
+ ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[C]], [[C]]
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32)
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
;
; VI-LABEL: name: test_umax_v3s16
; VI: liveins: $vgpr0, $vgpr1
; VI-NEXT: {{ $}}
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC3]]
- ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC4]]
- ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC5]]
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[DEF]], [[DEF]]
+ ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[DEF]], [[DEF]]
+ ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[DEF]], [[DEF]]
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16)
; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16)
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX2]](s16)
@@ -533,42 +502,32 @@ body: |
; GFX9-LABEL: name: test_umax_v3s16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[DEF]], [[DEF]]
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]]
- ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]]
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
- ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
+ ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>)
;
; GFX1250-LABEL: name: test_umax_v3s16
; GFX1250: liveins: $vgpr0, $vgpr1
; GFX1250-NEXT: {{ $}}
- ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX1250-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX1250-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX1250-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX1250-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX1250-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[DEF]], [[DEF]]
+ ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>)
; GFX1250-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX1250-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX1250-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX1250-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX1250-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]]
- ; GFX1250-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]]
- ; GFX1250-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>)
- ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX1250-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16)
- ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
- ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ; GFX1250-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
+ ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_IMPLICIT_DEF
%2:_(<3 x s16>) = G_UMAX %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
index 8666c29..e3dd7d4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
@@ -479,51 +479,20 @@ body: |
; SI-LABEL: name: test_umin_v3s16
; SI: liveins: $vgpr0, $vgpr1
; SI-NEXT: {{ $}}
- ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]]
- ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[LSHR]], [[LSHR1]]
- ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]]
+ ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[C]], [[C]]
+ ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[C]], [[C]]
+ ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[C]], [[C]]
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32)
; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
;
; VI-LABEL: name: test_umin_v3s16
; VI: liveins: $vgpr0, $vgpr1
; VI-NEXT: {{ $}}
- ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
- ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
- ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
- ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
- ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC3]]
- ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC4]]
- ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC5]]
+ ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[DEF]], [[DEF]]
+ ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[DEF]], [[DEF]]
+ ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[DEF]], [[DEF]]
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16)
; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16)
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN2]](s16)
@@ -533,42 +502,32 @@ body: |
; GFX9-LABEL: name: test_umin_v3s16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[DEF]], [[DEF]]
+ ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]]
- ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]]
- ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
- ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
+ ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>)
;
; GFX1250-LABEL: name: test_umin_v3s16
; GFX1250: liveins: $vgpr0, $vgpr1
; GFX1250-NEXT: {{ $}}
- ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX1250-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX1250-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
+ ; GFX1250-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX1250-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX1250-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX1250-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[DEF]], [[DEF]]
+ ; GFX1250-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>)
; GFX1250-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX1250-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX1250-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX1250-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX1250-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
- ; GFX1250-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]]
- ; GFX1250-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]]
- ; GFX1250-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>)
- ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; GFX1250-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX1250-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16)
- ; GFX1250-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
- ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>)
+ ; GFX1250-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[ANYEXT]](s32)
+ ; GFX1250-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<3 x s32>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(<3 x s16>) = G_IMPLICIT_DEF
%2:_(<3 x s16>) = G_UMIN %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
index ee57b72..16ba8c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
@@ -101,15 +101,10 @@ name: test_unmerge_s16_v3s16
body: |
bb.0:
; CHECK-LABEL: name: test_unmerge_s16_v3s16
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32)
- ; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
- ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST1]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $vgpr2 = COPY [[DEF]](s32)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
%1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
%4:_(s32) = G_ANYEXT %1
@@ -869,10 +864,9 @@ name: test_unmerge_v2s1
body: |
bb.0:
; CHECK-LABEL: name: test_unmerge_v2s1
- ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[DEF]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[DEF]](s32)
; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](s1)
; CHECK-NEXT: S_NOP 0, implicit [[TRUNC1]](s1)
%0:_(<2 x s1>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir
index c6e3108..7dd17b6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir
@@ -462,23 +462,16 @@ body: |
; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR7]]
; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL5]]
; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32)
- ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL6]]
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]]
- ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR8]], [[SHL7]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
- ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR9]], [[SHL8]]
- ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL7]]
+ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX8-LABEL: name: ushlsat_v3s16
@@ -512,27 +505,19 @@ body: |
; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16)
; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]]
; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]]
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16)
; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]]
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16)
- ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]]
- ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
- ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL5]]
- ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL4]]
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: ushlsat_v3s16
@@ -566,17 +551,10 @@ body: |
; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16)
; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]]
; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]]
- ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT]](s16), [[SELECT1]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT2]](s16), [[TRUNC6]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC7]](s16), [[TRUNC8]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SELECT2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir
index 6aff253..75ce080 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir
@@ -199,31 +199,24 @@ body: |
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND2]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND3]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND6]](s32)
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
- ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
- ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]]
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND9]](s32), [[AND10]](s32), [[AND11]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
+ ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
+ ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]]
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND7]](s32), [[AND8]](s32), [[AND9]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
index 4c4cd38..7758594 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
@@ -371,24 +371,16 @@ body: |
; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]]
; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]]
; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32)
- ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32)
; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]]
- ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR5]], [[SHL7]]
- ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]]
- ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL7]]
+ ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX8-LABEL: name: usubsat_v3s16
@@ -412,27 +404,19 @@ body: |
; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC3]]
; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC4]]
; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC5]]
- ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
- ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16)
; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16)
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
- ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16)
- ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
+ ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
- ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL2]]
- ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
+ ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]]
+ ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
;
; GFX9-LABEL: name: usubsat_v3s16
@@ -441,38 +425,28 @@ body: |
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16)
- ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV]], [[BUILD_VECTOR1]]
- ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV]], [[BUILD_VECTOR]]
+ ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV1]], [[BUILD_VECTOR1]]
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+ ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+ ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
- ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
- ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
- ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
- ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32)
- ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16)
- ; GFX9-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16)
- ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>)
+ ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16)
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>)
; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
index a993afc..a635689 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
@@ -376,22 +376,17 @@ body: |
bb.0:
; CHECK-LABEL: name: test_xor_v5s32
- ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
- ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32)
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
- ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[UV9]]
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>)
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](<2 x s32>)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>)
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[XOR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32)
+ ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[XOR2]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>)
%0:_(<5 x s32>) = G_IMPLICIT_DEF
%1:_(<5 x s32>) = G_IMPLICIT_DEF
@@ -459,24 +454,12 @@ body: |
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>)
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[BITCAST]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
@@ -513,63 +496,62 @@ body: |
bb.0:
; CHECK-LABEL: name: test_xor_v5s16
- ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>)
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>)
- ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>)
- ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UV3]](<2 x s16>), [[UV4]](<2 x s16>)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>)
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL3]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>)
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
- ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]]
- ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]]
- ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32)
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL4]]
+ ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL5]]
+ ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL2]]
- ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
- ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
- ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]]
- ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL6]]
+ ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
+ ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL7]]
+ ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>)
; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]]
- ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>)
- ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>)
- ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>)
- ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
- ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32)
- ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]]
- ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]]
- ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
- ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL5]]
- ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32)
- ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR4]], [[C]](s32)
- ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]]
- ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32)
- ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>), [[UV13]](<2 x s16>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]]
+ ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]]
+ ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C2]]
+ ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL9]]
+ ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32)
+ ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL10]]
+ ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32)
+ ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL11]]
+ ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32)
+ ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>)
%0:_(<5 x s16>) = G_IMPLICIT_DEF
%1:_(<5 x s16>) = G_IMPLICIT_DEF
@@ -585,13 +567,10 @@ body: |
bb.0:
; CHECK-LABEL: name: test_xor_v3s8
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV4]]
- ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[UV5]]
- ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV6]]
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32), [[XOR2]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s8>) = G_IMPLICIT_DEF
@@ -607,14 +586,11 @@ body: |
bb.0:
; CHECK-LABEL: name: test_xor_v4s8
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV4]]
- ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[UV5]]
- ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV6]]
- ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[UV3]], [[UV7]]
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]]
+ ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[DEF]], [[DEF]]
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32), [[XOR2]](s32), [[XOR3]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s8>) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
index 0b34dff..bce7722 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir
@@ -730,38 +730,31 @@ body: |
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[SHL3]]
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
- ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[DEF]](s64), 0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[C3]](s64)
- ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[MV1]](s64), 0
+ ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s48) = G_EXTRACT [[MV1]](s64), 0
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[COPY2]]
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s48)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[ANYEXT]]
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND2]](s64)
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
- ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32)
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]]
; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32)
; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL4]]
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C1]]
; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C1]]
- ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
- ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL5]]
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C1]]
- ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
- ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[LSHR3]], [[SHL6]]
- ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32)
- ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL5]]
- ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[OR8]](s32)
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR7]](s32)
- ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY3]](s32), [[UV4]](s32)
- ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]]
- ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR9]](s32), [[OR10]](s32)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL3]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR6]](s32)
+ ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR6]](s32)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR6]](s32)
+ ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR6]](s32)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR6]](s32)
+ ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR6]](s32)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[OR6]](s32)
+ ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[AND1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV7]](s384)
; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112)
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index 9db6d70..6a95881 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -3154,17 +3154,19 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:2 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:2 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:4 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v2, v1 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v3, v3 row_shr:8 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:8 row_mask:0xf bank_mask:0xf
@@ -3172,6 +3174,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v5, v2 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_permlanex16_b32 v4, v1, -1, -1
@@ -3184,8 +3187,9 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, s2
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 31
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v4 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v2, s2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
@@ -3237,8 +3241,9 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1164_DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX1164_DPP-NEXT: v_mov_b32_e32 v9, v5
; GFX1164_DPP-NEXT: v_readfirstlane_b32 s3, v7
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_u32 v6, vcc, s2, v8
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: s_mov_b32 s2, s6
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v7, null, s3, v9, vcc
; GFX1164_DPP-NEXT: s_mov_b32 s3, 0x31016000
@@ -5856,6 +5861,7 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out, ptr addrspace
; GFX1164-NEXT: buffer_gl1_inv
; GFX1164-NEXT: buffer_gl0_inv
; GFX1164-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[7:8]
+; GFX1164-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[10:11]
@@ -6384,6 +6390,7 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
; GFX1164-NEXT: buffer_gl1_inv
; GFX1164-NEXT: buffer_gl0_inv
; GFX1164-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[7:8]
+; GFX1164-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164-NEXT: s_or_b64 s[12:13], vcc, s[12:13]
; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[12:13]
@@ -6983,6 +6990,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1164_ITERATIVE-NEXT: buffer_gl1_inv
; GFX1164_ITERATIVE-NEXT: buffer_gl0_inv
; GFX1164_ITERATIVE-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
+; GFX1164_ITERATIVE-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_ITERATIVE-NEXT: s_or_b64 s[12:13], vcc, s[12:13]
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164_ITERATIVE-NEXT: s_and_not1_b64 exec, exec, s[12:13]
@@ -7669,17 +7677,19 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:2 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:2 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:4 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v2, v1 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v3, v3 row_shr:8 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:8 row_mask:0xf bank_mask:0xf
@@ -7687,6 +7697,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v5, v2 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_permlanex16_b32 v4, v1, -1, -1
@@ -7699,8 +7710,9 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, s2
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 31
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v4 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v2, s2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
@@ -7748,8 +7760,9 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_mov_b32_e32 v11, v7
; GFX1164_DPP-NEXT: v_mov_b32_e32 v10, v6
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1164_DPP-NEXT: v_sub_co_u32 v8, vcc, v10, s8
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_subrev_co_ci_u32_e64 v9, null, s9, v11, vcc
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_mov_b32_e32 v6, v8
@@ -7761,6 +7774,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1164_DPP-NEXT: buffer_gl1_inv
; GFX1164_DPP-NEXT: buffer_gl0_inv
; GFX1164_DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[10:11]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: s_or_b64 s[12:13], vcc, s[12:13]
; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164_DPP-NEXT: s_and_not1_b64 exec, exec, s[12:13]
@@ -7775,8 +7789,9 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
; GFX1164_DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX1164_DPP-NEXT: v_mov_b32_e32 v9, v5
; GFX1164_DPP-NEXT: v_readfirstlane_b32 s3, v7
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_sub_co_u32 v6, vcc, s2, v8
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: s_mov_b32 s2, -1
; GFX1164_DPP-NEXT: v_sub_co_ci_u32_e64 v7, null, s3, v9, vcc
; GFX1164_DPP-NEXT: s_mov_b32 s3, 0x31016000
@@ -12770,6 +12785,7 @@ define amdgpu_kernel void @uniform_fadd_bf16(ptr addrspace(1) %result, ptr addrs
; GFX1164-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[2:3], off, s[4:7], 0 glc
; GFX1164-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1164-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1
+; GFX1164-TRUE16-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164-TRUE16-NEXT: v_mov_b32_e32 v1, v2
; GFX1164-TRUE16-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
; GFX1164-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -12826,6 +12842,7 @@ define amdgpu_kernel void @uniform_fadd_bf16(ptr addrspace(1) %result, ptr addrs
; GFX1164-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[2:3], off, s[4:7], 0 glc
; GFX1164-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1164-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1
+; GFX1164-FAKE16-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164-FAKE16-NEXT: v_mov_b32_e32 v1, v2
; GFX1164-FAKE16-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
; GFX1164-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -13804,9 +13821,10 @@ define amdgpu_kernel void @uniform_fadd_v2bf16(ptr addrspace(1) %result, ptr add
; GFX1164-TRUE16-NEXT: v_or_b32_e32 v6, 0x400000, v2
; GFX1164-TRUE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
; GFX1164-TRUE16-NEXT: v_add3_u32 v4, v4, v2, 0x7fff
-; GFX1164-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1164-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX1164-TRUE16-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
; GFX1164-TRUE16-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
+; GFX1164-TRUE16-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164-TRUE16-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc
; GFX1164-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1164-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.h
@@ -13815,6 +13833,7 @@ define amdgpu_kernel void @uniform_fadd_v2bf16(ptr addrspace(1) %result, ptr add
; GFX1164-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[2:3], off, s[4:7], 0 glc
; GFX1164-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX1164-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1
+; GFX1164-TRUE16-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164-TRUE16-NEXT: v_mov_b32_e32 v1, v2
; GFX1164-TRUE16-NEXT: s_or_b64 s[8:9], vcc, s[8:9]
; GFX1164-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -13863,6 +13882,7 @@ define amdgpu_kernel void @uniform_fadd_v2bf16(ptr addrspace(1) %result, ptr add
; GFX1164-FAKE16-NEXT: v_add3_u32 v3, v3, v0, 0x7fff
; GFX1164-FAKE16-NEXT: v_add3_u32 v4, v4, v2, 0x7fff
; GFX1164-FAKE16-NEXT: v_cmp_u_f32_e64 s[0:1], v0, v0
+; GFX1164-FAKE16-NEXT: s_waitcnt_depctr 0xf1ff
; GFX1164-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164-FAKE16-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc
; GFX1164-FAKE16-NEXT: v_cndmask_b32_e64 v0, v3, v5, s[0:1]
@@ -13873,6 +13893,7 @@ define amdgpu_kernel void @uniform_fadd_v2bf16(ptr addrspace(1) %result, ptr add
; GFX1164-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[2:3], off, s[4:7], 0 glc
; GFX1164-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX1164-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc, v2, v1
+; GFX1164-FAKE16-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164-FAKE16-NEXT: v_mov_b32_e32 v1, v2
; GFX1164-FAKE16-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
; GFX1164-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 6167a84..08a4f0c 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -957,6 +957,7 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47
; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63
; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32
@@ -2640,17 +2641,19 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:2 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:2 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:4 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v2, v1 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v3, v3 row_shr:8 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:8 row_mask:0xf bank_mask:0xf
@@ -2658,6 +2661,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v5, v2 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_permlanex16_b32 v4, v1, -1, -1
@@ -2670,13 +2674,15 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, s2
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 31
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v4 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v2, s2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 15
; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v1 row_shr:1 row_mask:0xf bank_mask:0xf
@@ -2701,6 +2707,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: v_writelane_b32 v6, s9, 48
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[6:7]
; GFX1164_DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: s_mov_b32 s2, -1
; GFX1164_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1164_DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc
@@ -3288,23 +3295,26 @@ define amdgpu_kernel void @add_i64_varying_nouse() {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_xmask:2 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_xmask:2 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_xmask:4 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v2, v1 row_xmask:4 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v3, v3 row_xmask:8 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_xmask:8 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_permlanex16_b32 v3, v2, 0, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc
; GFX1164_DPP-NEXT: v_add_co_u32 v2, vcc, v2, v3
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_permlanex16_b32 v4, v1, 0, 0
; GFX1164_DPP-NEXT: v_permlane64_b32 v3, v2
@@ -3312,10 +3322,11 @@ define amdgpu_kernel void @add_i64_varying_nouse() {
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc
; GFX1164_DPP-NEXT: v_permlane64_b32 v4, v1
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
; GFX1164_DPP-NEXT: v_add_co_u32 v2, vcc, v2, v3
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffc
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v3, null, v1, v4, vcc
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, v2
@@ -4337,6 +4348,7 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47
; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63
; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32
@@ -6043,17 +6055,19 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:2 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v2, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:2 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v3, vcc, v3, v3 row_shr:4 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v2, v1 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v2, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v3, v3 row_shr:8 row_mask:0xf bank_mask:0xf bound_ctrl:1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v3, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:8 row_mask:0xf bank_mask:0xf
@@ -6061,6 +6075,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v4, vcc
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v5, v2 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_permlanex16_b32 v4, v1, -1, -1
@@ -6073,13 +6088,15 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, s2
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 31
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v4 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v2, s2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 15
; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v1 row_shr:1 row_mask:0xf bank_mask:0xf
@@ -6104,6 +6121,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: v_writelane_b32 v6, s9, 48
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[6:7]
; GFX1164_DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: s_mov_b32 s2, -1
; GFX1164_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1164_DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc
@@ -6724,6 +6742,7 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47
; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63
; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32
@@ -7441,6 +7460,7 @@ define amdgpu_kernel void @and_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_readlane_b32 s3, v2, 15
; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 31
@@ -8071,6 +8091,7 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47
; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63
; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32
@@ -8787,6 +8808,7 @@ define amdgpu_kernel void @or_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_readlane_b32 s3, v2, 15
; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 31
@@ -9417,6 +9439,7 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47
; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63
; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32
@@ -10133,6 +10156,7 @@ define amdgpu_kernel void @xor_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_readlane_b32 s3, v2, 15
; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 31
@@ -10763,6 +10787,7 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47
; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63
; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32
@@ -11041,6 +11066,7 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) {
; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, 0, vcc
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
+; GFX1164-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc
; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
@@ -11870,16 +11896,19 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:2 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_cmp_gt_i64_e32 vcc, v[1:2], v[5:6]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v6, 1
; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v2 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:4 row_mask:0xf bank_mask:0xf
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_cmp_gt_i64_e32 vcc, v[1:2], v[3:4]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v4, 1
@@ -11887,8 +11916,9 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:8 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_cmp_gt_i64_e32 vcc, v[1:2], v[5:6]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX1164_DPP-NEXT: v_permlanex16_b32 v5, v2, -1, -1
@@ -11898,6 +11928,7 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
; GFX1164_DPP-NEXT: v_cmp_gt_i64_e32 vcc, v[1:2], v[3:4]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v4, 1
@@ -11912,14 +11943,17 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v5 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v5, 1
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_cmp_gt_i64_e32 vcc, v[1:2], v[3:4]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 15
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf
@@ -11944,6 +11978,7 @@ define amdgpu_kernel void @max_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: v_writelane_b32 v4, s9, 48
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[6:7]
; GFX1164_DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: s_mov_b32 s2, -1
; GFX1164_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1164_DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc
@@ -12577,6 +12612,7 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47
; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63
; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32
@@ -12855,6 +12891,7 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) {
; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_cmp_lt_i64_e32 vcc, s[2:3], v[0:1]
+; GFX1164-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc
; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
@@ -13684,16 +13721,19 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:2 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_cmp_lt_i64_e32 vcc, v[1:2], v[5:6]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v6, -2
; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, -1
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v2 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:4 row_mask:0xf bank_mask:0xf
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_cmp_lt_i64_e32 vcc, v[1:2], v[3:4]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v4, -2
@@ -13701,8 +13741,9 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:8 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_cmp_lt_i64_e32 vcc, v[1:2], v[5:6]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX1164_DPP-NEXT: v_permlanex16_b32 v5, v2, -1, -1
@@ -13712,6 +13753,7 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
; GFX1164_DPP-NEXT: v_cmp_lt_i64_e32 vcc, v[1:2], v[3:4]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v4, -2
@@ -13726,14 +13768,17 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v5 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
; GFX1164_DPP-NEXT: v_bfrev_b32_e32 v5, -2
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_cmp_lt_i64_e32 vcc, v[1:2], v[3:4]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, -1
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 15
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf
@@ -13758,6 +13803,7 @@ define amdgpu_kernel void @min_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: v_writelane_b32 v4, s9, 48
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[6:7]
; GFX1164_DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: s_mov_b32 s2, -1
; GFX1164_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1164_DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc
@@ -14391,6 +14437,7 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47
; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63
; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32
@@ -14665,6 +14712,7 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
+; GFX1164-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc
; GFX1164-NEXT: v_cndmask_b32_e64 v1, 0, s3, vcc
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
@@ -15489,16 +15537,19 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:2 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_cmp_gt_u64_e32 vcc, v[1:2], v[5:6]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX1164_DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v2 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:4 row_mask:0xf bank_mask:0xf
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_cmp_gt_u64_e32 vcc, v[1:2], v[3:4]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0
@@ -15506,8 +15557,9 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:8 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_cmp_gt_u64_e32 vcc, v[1:2], v[5:6]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX1164_DPP-NEXT: v_permlanex16_b32 v5, v2, -1, -1
@@ -15517,6 +15569,7 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
; GFX1164_DPP-NEXT: v_cmp_gt_u64_e32 vcc, v[1:2], v[3:4]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0
@@ -15531,14 +15584,17 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v5 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, 0
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_cmp_gt_u64_e32 vcc, v[1:2], v[3:4]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 15
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf
@@ -15563,6 +15619,7 @@ define amdgpu_kernel void @umax_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: v_writelane_b32 v4, s9, 48
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[6:7]
; GFX1164_DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: s_mov_b32 s2, -1
; GFX1164_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1164_DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc
@@ -16190,6 +16247,7 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v1, 47
; GFX1164_DPP-NEXT: v_readlane_b32 s6, v1, 63
; GFX1164_DPP-NEXT: v_writelane_b32 v3, s3, 32
@@ -16465,6 +16523,7 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
; GFX1164-NEXT: v_cndmask_b32_e64 v0, 5, -1, vcc
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1]
+; GFX1164-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164-NEXT: v_cndmask_b32_e64 v1, v1, s3, vcc
; GFX1164-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc
; GFX1164-NEXT: s_mov_b32 s3, 0x31016000
@@ -17287,16 +17346,19 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:2 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:2 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_cmp_lt_u64_e32 vcc, v[1:2], v[5:6]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX1164_DPP-NEXT: v_mov_b32_e32 v6, -1
; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, -1
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v2 row_shr:4 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v1 row_shr:4 row_mask:0xf bank_mask:0xf
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_cmp_lt_u64_e32 vcc, v[1:2], v[3:4]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, -1
@@ -17304,8 +17366,9 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v6, v2 row_shr:8 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v1 row_shr:8 row_mask:0xf bank_mask:0xf
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_cmp_lt_u64_e32 vcc, v[1:2], v[5:6]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX1164_DPP-NEXT: v_permlanex16_b32 v5, v2, -1, -1
@@ -17315,6 +17378,7 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xa bank_mask:0xf
; GFX1164_DPP-NEXT: v_cmp_lt_u64_e32 vcc, v[1:2], v[3:4]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, -1
@@ -17329,14 +17393,17 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v5 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_dpp v3, v6 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
; GFX1164_DPP-NEXT: v_mov_b32_e32 v5, -1
-; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1164_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1164_DPP-NEXT: v_cmp_lt_u64_e32 vcc, v[1:2], v[3:4]
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, -1
; GFX1164_DPP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[0:1]
+; GFX1164_DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164_DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164_DPP-NEXT: v_mov_b32_dpp v5, v2 row_shr:1 row_mask:0xf bank_mask:0xf
; GFX1164_DPP-NEXT: v_readlane_b32 s2, v2, 15
; GFX1164_DPP-NEXT: v_mov_b32_dpp v4, v1 row_shr:1 row_mask:0xf bank_mask:0xf
@@ -17361,6 +17428,7 @@ define amdgpu_kernel void @umin_i64_varying(ptr addrspace(1) %out) {
; GFX1164_DPP-NEXT: v_writelane_b32 v4, s9, 48
; GFX1164_DPP-NEXT: s_mov_b64 exec, s[6:7]
; GFX1164_DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
+; GFX1164_DPP-NEXT: s_waitcnt_depctr 0xfffd
; GFX1164_DPP-NEXT: s_mov_b32 s2, -1
; GFX1164_DPP-NEXT: ; implicit-def: $vgpr7_vgpr8
; GFX1164_DPP-NEXT: s_and_saveexec_b64 s[6:7], vcc
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
index 0f59304..62f09de 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
@@ -515,6 +515,7 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-NEXT: s_or_saveexec_b64 s[10:11], -1
+; GFX1164-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-NEXT: v_readlane_b32 s12, v1, 63
; GFX1164-NEXT: v_readlane_b32 s14, v1, 47
; GFX1164-NEXT: v_writelane_b32 v3, s13, 32
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index 5959f76..f6149cf 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -1387,7 +1387,7 @@ define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
-; GFX11-NEXT: s_waitcnt_depctr 0xfffe
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_add_u32 s0, s2, s0
; GFX11-NEXT: s_addc_u32 s1, s3, s1
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll
index b7ee9f7..65832f8 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics-min-max-system.ll
@@ -519,6 +519,7 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1]
@@ -649,6 +650,7 @@ define amdgpu_ps <2 x float> @global_max_saddr_i64_rtn_neg128(ptr addrspace(1) i
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1]
@@ -771,6 +773,7 @@ define amdgpu_ps void @global_max_saddr_i64_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: v_mov_b32_e32 v6, v4
; GFX11-NEXT: v_mov_b32_e32 v5, v3
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -885,6 +888,7 @@ define amdgpu_ps void @global_max_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: v_mov_b32_e32 v6, v4
; GFX11-NEXT: v_mov_b32_e32 v5, v3
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -1443,6 +1447,7 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn(ptr addrspace(1) inreg %s
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1]
@@ -1573,6 +1578,7 @@ define amdgpu_ps <2 x float> @global_min_saddr_i64_rtn_neg128(ptr addrspace(1) i
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1]
@@ -1695,6 +1701,7 @@ define amdgpu_ps void @global_min_saddr_i64_nortn(ptr addrspace(1) inreg %sbase,
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: v_mov_b32_e32 v6, v4
; GFX11-NEXT: v_mov_b32_e32 v5, v3
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -1809,6 +1816,7 @@ define amdgpu_ps void @global_min_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: v_mov_b32_e32 v6, v4
; GFX11-NEXT: v_mov_b32_e32 v5, v3
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -2367,6 +2375,7 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn(ptr addrspace(1) inreg %
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1]
@@ -2497,6 +2506,7 @@ define amdgpu_ps <2 x float> @global_umax_saddr_i64_rtn_neg128(ptr addrspace(1)
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1]
@@ -2619,6 +2629,7 @@ define amdgpu_ps void @global_umax_saddr_i64_nortn(ptr addrspace(1) inreg %sbase
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: v_mov_b32_e32 v6, v4
; GFX11-NEXT: v_mov_b32_e32 v5, v3
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -2733,6 +2744,7 @@ define amdgpu_ps void @global_umax_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: v_mov_b32_e32 v6, v4
; GFX11-NEXT: v_mov_b32_e32 v5, v3
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -3291,6 +3303,7 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn(ptr addrspace(1) inreg %
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1]
@@ -3421,6 +3434,7 @@ define amdgpu_ps <2 x float> @global_umin_saddr_i64_rtn_neg128(ptr addrspace(1)
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[9:10]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_not1_b64 exec, exec, s[0:1]
@@ -3543,6 +3557,7 @@ define amdgpu_ps void @global_umin_saddr_i64_nortn(ptr addrspace(1) inreg %sbase
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: v_mov_b32_e32 v6, v4
; GFX11-NEXT: v_mov_b32_e32 v5, v3
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -3657,6 +3672,7 @@ define amdgpu_ps void @global_umin_saddr_i64_nortn_neg128(ptr addrspace(1) inreg
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc, v[3:4], v[5:6]
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: v_mov_b32_e32 v6, v4
; GFX11-NEXT: v_mov_b32_e32 v5, v3
; GFX11-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
index 4581efc..ff4b658 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
@@ -963,14 +963,14 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB1_2
; GFX1164-DPP-NEXT: ; %bb.1:
@@ -1996,14 +1996,14 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB3_2
; GFX1164-DPP-NEXT: ; %bb.1:
@@ -3029,14 +3029,14 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-DPP-NEXT: v_max_f32_e32 v1, v1, v2
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB5_2
; GFX1164-DPP-NEXT: ; %bb.1:
@@ -4248,10 +4248,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5]
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2
@@ -5511,10 +5512,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5]
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2
@@ -6774,10 +6776,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
; GFX1164-DPP-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5]
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
index bd570d9..15d4b9a 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
@@ -963,14 +963,14 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-DPP-NEXT: v_min_f32_e32 v1, v1, v2
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB1_2
; GFX1164-DPP-NEXT: ; %bb.1:
@@ -1996,14 +1996,14 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-DPP-NEXT: v_min_f32_e32 v1, v1, v2
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB3_2
; GFX1164-DPP-NEXT: ; %bb.1:
@@ -3029,14 +3029,14 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-DPP-NEXT: v_min_f32_e32 v1, v1, v2
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v4, exec_hi, v0
-; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v1
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v4
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB5_2
; GFX1164-DPP-NEXT: ; %bb.1:
@@ -4248,10 +4248,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
; GFX1164-DPP-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2
@@ -5511,10 +5512,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
; GFX1164-DPP-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2
@@ -6774,10 +6776,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
; GFX1164-DPP-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX1164-DPP-NEXT: s_mov_b64 exec, s[0:1]
-; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1164-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v10, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164-DPP-NEXT: s_waitcnt_depctr 0xfffe
; GFX1164-DPP-NEXT: v_mbcnt_hi_u32_b32 v6, exec_hi, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, v2
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir
index ec940f8..2f6e986 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir
@@ -8,16 +8,14 @@ body: |
; CHECK-LABEL: name: test_struct
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<3 x s32>)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV]](s32)
- ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32)
- ; CHECK-NEXT: $vgpr3 = COPY [[UV2]](s32)
- ; CHECK-NEXT: $vgpr4_vgpr5 = COPY [[DEF2]](s64)
- ; CHECK-NEXT: $vgpr6 = COPY [[DEF3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $vgpr2 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $vgpr3 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $vgpr4_vgpr5 = COPY [[DEF1]](s64)
+ ; CHECK-NEXT: $vgpr6 = COPY [[DEF2]](<2 x s16>)
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%0:_(s32), %1:_(<3 x s32>), %2:_(s64), %3:_(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.dead)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
index 0a2e7af..8103140 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
@@ -1185,6 +1185,7 @@ define amdgpu_ps void @fcmp_x2(float %a) #0 {
; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0x3e800000, v0
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
; GFX11-NEXT: v_cmp_nle_f32_e32 vcc, 0, v0
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB21_1
; GFX11-NEXT: s_endpgm
@@ -1595,6 +1596,7 @@ define amdgpu_ps void @kill_with_loop_exit(float inreg %inp0, float inreg %inp1,
; GFX11-NEXT: s_mov_b64 s[2:3], exec
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
; GFX11-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1
+; GFX11-NEXT: s_waitcnt_depctr 0xf1ff
; GFX11-NEXT: .LBB26_2: ; %bb
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: v_add_f32_e32 v0, 0x3e800000, v0
diff --git a/llvm/test/CodeGen/AMDGPU/nor-divergent-lanemask.ll b/llvm/test/CodeGen/AMDGPU/nor-divergent-lanemask.ll
index 4205393..97c2bdc 100644
--- a/llvm/test/CodeGen/AMDGPU/nor-divergent-lanemask.ll
+++ b/llvm/test/CodeGen/AMDGPU/nor-divergent-lanemask.ll
@@ -13,6 +13,7 @@ define amdgpu_ps i64 @test_nor(i64 inreg %a, i64 inreg %b) {
; SDAG-W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; SDAG-W64-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SDAG-W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v0
+; SDAG-W64-NEXT: s_waitcnt_depctr 0xf1ff
; SDAG-W64-NEXT: ; return to shader part epilog
;
; GISEL-W64-LABEL: test_nor:
@@ -57,8 +58,8 @@ define amdgpu_ps i64 @test_or_two_uses(i64 inreg %a, i64 inreg %b) {
; SDAG-W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; SDAG-W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
; SDAG-W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v1
+; SDAG-W64-NEXT: s_waitcnt_depctr 0xf1ff
; SDAG-W64-NEXT: s_and_b64 s[0:1], s[0:1], vcc
-; SDAG-W64-NEXT: s_waitcnt_depctr 0xfffe
; SDAG-W64-NEXT: ; return to shader part epilog
;
; GISEL-W64-LABEL: test_or_two_uses:
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index b21c781..5461532 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -1239,6 +1239,7 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x, i32 inreg %y) #0
; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: s_and_not1_b64 exec, exec, vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB11_6
; GFX11-NEXT: ; %bb.1: ; %bb
@@ -2066,6 +2067,7 @@ define amdgpu_ps void @scc_use_after_kill_inst(float inreg %x, i32 inreg %y) #0
; GFX11-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, -1.0, vcc
; GFX11-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v1
+; GFX11-NEXT: s_waitcnt_depctr 0xfffd
; GFX11-NEXT: s_and_not1_b64 s[2:3], s[2:3], vcc
; GFX11-NEXT: s_cbranch_scc0 .LBB17_6
; GFX11-NEXT: ; %bb.1: ; %bb
diff --git a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir
index 1eabe62..e1d3ebc 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir
@@ -38,6 +38,22 @@
define amdgpu_gs void @mask_hazard_no_hazard1() { ret void }
define amdgpu_gs void @mask_hazard_no_hazard2() { ret void }
define amdgpu_gs void @mask_hazard_no_hazard3() { ret void }
+ define amdgpu_gs void @mask_hazard_cancel_hazard1() { ret void }
+ define amdgpu_gs void @mask_hazard_cancel_hazard2() { ret void }
+ define amdgpu_gs void @mask_hazard_cancel_hazard3() { ret void }
+ define amdgpu_gs void @mask_hazard_cancel_hazard4() { ret void }
+ define amdgpu_gs void @mask_hazard_partial_cancel1() { ret void }
+ define amdgpu_gs void @mask_hazard_partial_cancel2() { ret void }
+ define amdgpu_gs void @mask_hazard_partial_cancel3() { ret void }
+ define amdgpu_gs void @mask_hazard_partial_cancel4() { ret void }
+ define amdgpu_gs void @mask_hazard_valu_readlane1() { ret void }
+ define amdgpu_gs void @mask_hazard_valu_readlane2() { ret void }
+ define amdgpu_gs void @mask_hazard_valu_readlane3() { ret void }
+ define amdgpu_gs void @mask_hazard_valu_readfirstlane() { ret void }
+ define amdgpu_gs void @mask_hazard_valu_vcmp_vcc() { ret void }
+ define amdgpu_gs void @mask_hazard_valu_vcmp_sgpr() { ret void }
+ define amdgpu_gs void @mask_hazard_combine1() { ret void }
+ define amdgpu_gs void @mask_hazard_combine2() { ret void }
...
---
@@ -487,8 +503,8 @@ body: |
; GFX11-LABEL: name: mask_hazard_subreg3
; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
; GFX11-NEXT: $sgpr2 = S_MOV_B32 0
- ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534
; GFX11-NEXT: $sgpr3 = S_MOV_B32 0
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534
; GFX11-NEXT: S_ENDPGM 0
;
; GFX12-LABEL: name: mask_hazard_subreg3
@@ -655,3 +671,373 @@ body: |
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
S_ENDPGM 0
...
+
+---
+name: mask_hazard_cancel_hazard1
+body: |
+ bb.0:
+ ; GCN-LABEL: name: mask_hazard_cancel_hazard1
+ ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ ; GCN-NEXT: $vcc_lo = S_MOV_B32 0
+ ; GCN-NEXT: $vcc_hi = S_MOV_B32 0
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GCN-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo
+ ; GCN-NEXT: $vcc = S_MOV_B64 1
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ $vcc_lo = S_MOV_B32 0
+ $vcc_hi = S_MOV_B32 0
+ $sgpr0 = S_MOV_B32 $vcc_lo
+ $vcc = S_MOV_B64 1
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_cancel_hazard2
+body: |
+ bb.0:
+ ; GCN-LABEL: name: mask_hazard_cancel_hazard2
+ ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ ; GCN-NEXT: $vcc = S_MOV_B64 0
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GCN-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo
+ ; GCN-NEXT: $vcc = S_MOV_B64 1
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ $vcc = S_MOV_B64 0
+ $sgpr0 = S_MOV_B32 $vcc_lo
+ $vcc = S_MOV_B64 1
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_cancel_hazard3
+body: |
+ bb.0:
+ ; GCN-LABEL: name: mask_hazard_cancel_hazard3
+ ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 0
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GCN-NEXT: $sgpr4 = S_MOV_B32 $sgpr0
+ ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ $sgpr0_sgpr1 = S_MOV_B64 0
+ $sgpr4 = S_MOV_B32 $sgpr0
+ $sgpr0_sgpr1 = S_MOV_B64 1
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_cancel_hazard4
+body: |
+ bb.0:
+ ; GCN-LABEL: name: mask_hazard_cancel_hazard4
+ ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
+ ; GCN-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GCN-NEXT: $sgpr4 = S_MOV_B32 $sgpr0
+ ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 0
+ $sgpr4 = S_MOV_B32 $sgpr0
+ $sgpr0_sgpr1 = S_MOV_B64 1
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_partial_cancel1
+body: |
+ bb.0:
+ ; GFX11-LABEL: name: mask_hazard_partial_cancel1
+ ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ ; GFX11-NEXT: $vcc_lo = S_MOV_B32 0
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GFX11-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo
+ ; GFX11-NEXT: $vcc = S_MOV_B64 1
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: mask_hazard_partial_cancel1
+ ; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ ; GFX12-NEXT: $vcc_lo = S_MOV_B32 0
+ ; GFX12-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GFX12-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo
+ ; GFX12-NEXT: $vcc = S_MOV_B64 1
+ ; GFX12-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ $vcc_lo = S_MOV_B32 0
+ $sgpr0 = S_MOV_B32 $vcc_lo
+ $vcc = S_MOV_B64 1
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_partial_cancel2
+body: |
+ bb.0:
+ ; GFX11-LABEL: name: mask_hazard_partial_cancel2
+ ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ ; GFX11-NEXT: $vcc_hi = S_MOV_B32 0
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GFX11-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo
+ ; GFX11-NEXT: $vcc = S_MOV_B64 1
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: mask_hazard_partial_cancel2
+ ; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ ; GFX12-NEXT: $vcc_hi = S_MOV_B32 0
+ ; GFX12-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GFX12-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo
+ ; GFX12-NEXT: $vcc = S_MOV_B64 1
+ ; GFX12-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ $vcc_hi = S_MOV_B32 0
+ $sgpr0 = S_MOV_B32 $vcc_lo
+ $vcc = S_MOV_B64 1
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_partial_cancel3
+body: |
+ bb.0:
+ ; GFX11-LABEL: name: mask_hazard_partial_cancel3
+ ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GFX11-NEXT: $sgpr3 = S_MOV_B32 $sgpr0
+ ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: mask_hazard_partial_cancel3
+ ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ ; GFX12-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GFX12-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GFX12-NEXT: $sgpr3 = S_MOV_B32 $sgpr0
+ ; GFX12-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1
+ ; GFX12-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $sgpr3 = S_MOV_B32 $sgpr0
+ $sgpr0_sgpr1 = S_MOV_B64 1
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_partial_cancel4
+body: |
+ bb.0:
+ ; GFX11-LABEL: name: mask_hazard_partial_cancel4
+ ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ ; GFX11-NEXT: $sgpr1 = S_MOV_B32 0
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GFX11-NEXT: $sgpr3 = S_MOV_B32 $sgpr1
+ ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: mask_hazard_partial_cancel4
+ ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ ; GFX12-NEXT: $sgpr1 = S_MOV_B32 0
+ ; GFX12-NEXT: S_WAITCNT_DEPCTR 65534
+ ; GFX12-NEXT: $sgpr3 = S_MOV_B32 $sgpr1
+ ; GFX12-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1
+ ; GFX12-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ $sgpr1 = S_MOV_B32 0
+ $sgpr3 = S_MOV_B32 $sgpr1
+ $sgpr0_sgpr1 = S_MOV_B64 1
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_valu_readlane1
+body: |
+ bb.0:
+ ; GFX11-LABEL: name: mask_hazard_valu_readlane1
+ ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX11-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 61951
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: mask_hazard_valu_readlane1
+ ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX12-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0
+ ; GFX12-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ $sgpr2 = V_READLANE_B32 $vgpr3, 0
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_valu_readlane2
+body: |
+ bb.0:
+ ; GFX11-LABEL: name: mask_hazard_valu_readlane2
+ ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX11-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 61951
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: mask_hazard_valu_readlane2
+ ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX12-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1
+ ; GFX12-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ $sgpr3 = V_READLANE_B32 $vgpr3, 1
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_valu_readlane3
+body: |
+ bb.0:
+ ; GFX11-LABEL: name: mask_hazard_valu_readlane3
+ ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX11-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0
+ ; GFX11-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 61951
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: mask_hazard_valu_readlane3
+ ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX12-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0
+ ; GFX12-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1
+ ; GFX12-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ $sgpr2 = V_READLANE_B32 $vgpr3, 0
+ $sgpr3 = V_READLANE_B32 $vgpr3, 1
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_valu_readfirstlane
+body: |
+ bb.0:
+ ; GFX11-LABEL: name: mask_hazard_valu_readfirstlane
+ ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX11-NEXT: $sgpr2 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 61951
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: mask_hazard_valu_readfirstlane
+ ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX12-NEXT: $sgpr2 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ $sgpr2 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_valu_vcmp_vcc
+body: |
+ bb.0:
+ ; GFX11-LABEL: name: mask_hazard_valu_vcmp_vcc
+ ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ ; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 65533
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: mask_hazard_valu_vcmp_vcc
+ ; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ ; GFX12-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_valu_vcmp_sgpr
+body: |
+ bb.0:
+ ; GFX11-LABEL: name: mask_hazard_valu_vcmp_sgpr
+ ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 61951
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: mask_hazard_valu_vcmp_sgpr
+ ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX12-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
+ $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_combine1
+body: |
+ bb.0:
+ ; GFX11-LABEL: name: mask_hazard_combine1
+ ; GFX11: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ ; GFX11-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ ; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
+ ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr1 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 61948
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: mask_hazard_combine1
+ ; GFX12: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ ; GFX12-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ ; GFX12-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX12-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
+ ; GFX12-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr1 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
+ $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 0
+ $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: mask_hazard_combine2
+body: |
+ bb.0:
+ ; GFX11-LABEL: name: mask_hazard_combine2
+ ; GFX11: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ ; GFX11-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ ; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
+ ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 65532
+ ; GFX11-NEXT: $sgpr1 = S_MOV_B32 $sgpr4
+ ; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 61950
+ ; GFX11-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: mask_hazard_combine2
+ ; GFX12: $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ ; GFX12-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ ; GFX12-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ ; GFX12-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
+ ; GFX12-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr1 = S_MOV_B32 $sgpr4
+ ; GFX12-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
+ $vgpr3 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+ $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+ $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec
+ V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec
+ $sgpr0 = S_MOV_B32 0
+ $sgpr1 = S_MOV_B32 $sgpr4
+ $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/Hexagon/inst_masked_store_bug1.ll b/llvm/test/CodeGen/Hexagon/inst_masked_store_bug1.ll
new file mode 100644
index 0000000..fcf1246
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/inst_masked_store_bug1.ll
@@ -0,0 +1,94 @@
+;; REQUIRES: asserts
+;; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b %s -o - | FileCheck %s
+;; Sanity check for lowering masked scatter without assertion errors.
+
+define void @outer_product(ptr %aptr, ptr %bptr, ptr %cptr, i32 %T, i32 %W) {
+entry:
+ %W.ripple.bcast.splatinsert = insertelement <8 x i32> poison, i32 %W, i64 0
+ %W.ripple.bcast.splat = shufflevector <8 x i32> %W.ripple.bcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer
+ %div1194 = lshr i32 %T, 3
+ %cmp84.not = icmp ult i32 %T, 8
+ br i1 %cmp84.not, label %for.end49, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %div10195 = lshr i32 %W, 3
+ %cmp1782.not = icmp ult i32 %W, 8
+ %arrayidx27.ripple.LS.dim.slope = mul <8 x i32> %W.ripple.bcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %arrayidx27.ripple.LS.dim.slope.ripple.bcast = shufflevector <8 x i32> %arrayidx27.ripple.LS.dim.slope, <8 x i32> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+ %arrayidx27.ripple.LS.slope = add <64 x i32> %arrayidx27.ripple.LS.dim.slope.ripple.bcast, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %invariant.gep196 = getelementptr i8, ptr %cptr, <64 x i32> %arrayidx27.ripple.LS.slope
+ br label %for.body
+
+for.body: ; preds = %for.end, %for.body.preheader
+ %ripple.par.iv.085 = phi i32 [ %add48, %for.end ], [ 0, %for.body.preheader ]
+ %mul2 = shl i32 %ripple.par.iv.085, 3
+ br i1 %cmp1782.not, label %for.end, label %for.body18.lr.ph
+
+for.body18.lr.ph: ; preds = %for.body
+ %arrayidx = getelementptr inbounds nuw i8, ptr %aptr, i32 %mul2
+ %mul25 = mul i32 %mul2, %W
+ %gep197 = getelementptr i8, <64 x ptr> %invariant.gep196, i32 %mul25
+ br label %for.body18
+
+for.body18: ; preds = %for.body18, %for.body18.lr.ph
+ %ripple.par.iv15.083 = phi i32 [ 0, %for.body18.lr.ph ], [ %add28, %for.body18 ]
+ %mul19 = shl i32 %ripple.par.iv15.083, 3
+ %.ripple.LS.instance184 = load <8 x i8>, ptr %arrayidx, align 1
+ %.ripple.LS.instance184.ripple.bcast = shufflevector <8 x i8> %.ripple.LS.instance184, <8 x i8> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+ %arrayidx21 = getelementptr inbounds nuw i8, ptr %bptr, i32 %mul19
+ %.ripple.LS.instance = load <8 x i8>, ptr %arrayidx21, align 1
+ %.ripple.LS.instance.ripple.bcast = shufflevector <8 x i8> %.ripple.LS.instance, <8 x i8> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %mul23.ripple.LS.instance = mul <64 x i8> %.ripple.LS.instance.ripple.bcast, %.ripple.LS.instance184.ripple.bcast
+ %gep = getelementptr i8, <64 x ptr> %gep197, i32 %mul19
+ tail call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> %mul23.ripple.LS.instance, <64 x ptr> %gep, i32 1, <64 x i1> splat (i1 true))
+ %add28 = add nuw i32 %ripple.par.iv15.083, 1
+ %cmp17 = icmp ult i32 %add28, %div10195
+ br i1 %cmp17, label %for.body18, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body18
+ %0 = shl i32 %add28, 3
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %for.body
+ %ripple.par.iv15.0.lcssa = phi i32 [ 0, %for.body ], [ %0, %for.end.loopexit ]
+ %add30.ripple.bcast.splatinsert = insertelement <8 x i32> poison, i32 %ripple.par.iv15.0.lcssa, i64 0
+ %add30.ripple.bcast.splat = shufflevector <8 x i32> %add30.ripple.bcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer
+ %add30.ripple.LS.instance = or disjoint <8 x i32> %add30.ripple.bcast.splat, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %cmp32.ripple.LS.instance = icmp ne i32 %ripple.par.iv15.0.lcssa, %W
+ %cmp32.ripple.LS.instance.ripple.bcast.splatinsert = insertelement <8 x i1> poison, i1 %cmp32.ripple.LS.instance, i64 0
+ %cmp32.ripple.LS.instance.ripple.bcast.splat = shufflevector <8 x i1> %cmp32.ripple.LS.instance.ripple.bcast.splatinsert, <8 x i1> poison, <8 x i32> zeroinitializer
+ %cmp33.ripple.vectorized = icmp ult <8 x i32> %add30.ripple.LS.instance, %W.ripple.bcast.splat
+ %or.cond.ripple.LS.instance = select <8 x i1> %cmp32.ripple.LS.instance.ripple.bcast.splat, <8 x i1> %cmp33.ripple.vectorized, <8 x i1> zeroinitializer
+ %or.cond.ripple.LS.instance.ripple.bcast = shufflevector <8 x i1> %or.cond.ripple.LS.instance, <8 x i1> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle = shufflevector <8 x i1> %or.cond.ripple.LS.instance, <8 x i1> <i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 false>, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 15>
+ %or.cond.ripple.LS.instance.ripple.reducelog2.operator = or <8 x i1> %or.cond.ripple.LS.instance, %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle
+ %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle189 = shufflevector <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator, <8 x i1> <i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 poison, i1 false, i1 false>, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 14, i32 15>
+ %or.cond.ripple.LS.instance.ripple.reducelog2.operator190 = or <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator, %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle189
+ %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle191 = shufflevector <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator190, <8 x i1> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+ %or.cond.ripple.LS.instance.ripple.reducelog2.operator192 = or <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator190, %or.cond.ripple.LS.instance.ripple.reducelog2.shuffle191
+ %ripple.red.extract.ripple.bcast.splat = shufflevector <8 x i1> %or.cond.ripple.LS.instance.ripple.reducelog2.operator192, <8 x i1> poison, <8 x i32> zeroinitializer
+ %arrayidx34.ripple.branch.clone = getelementptr inbounds nuw i8, ptr %aptr, i32 %mul2
+ %.ripple.LS.instance188.ripple.branch.clone.ripple.masked.load = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %arrayidx34.ripple.branch.clone, i32 1, <8 x i1> %ripple.red.extract.ripple.bcast.splat, <8 x i8> poison)
+ %.ripple.LS.instance188.ripple.bcast.ripple.branch.clone = shufflevector <8 x i8> %.ripple.LS.instance188.ripple.branch.clone.ripple.masked.load, <8 x i8> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+ %arrayidx36.ripple.branch.clone = getelementptr inbounds nuw i8, ptr %bptr, i32 %ripple.par.iv15.0.lcssa
+ %.ripple.LS.instance187.ripple.branch.clone.ripple.masked.load = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %arrayidx36.ripple.branch.clone, i32 1, <8 x i1> %or.cond.ripple.LS.instance, <8 x i8> poison)
+ %.ripple.LS.instance187.ripple.bcast.ripple.branch.clone = shufflevector <8 x i8> %.ripple.LS.instance187.ripple.branch.clone.ripple.masked.load, <8 x i8> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %mul38.ripple.LS.instance.ripple.branch.clone = mul <64 x i8> %.ripple.LS.instance187.ripple.bcast.ripple.branch.clone, %.ripple.LS.instance188.ripple.bcast.ripple.branch.clone
+ %mul40.ripple.branch.clone = mul i32 %mul2, %W
+ %1 = getelementptr i8, ptr %cptr, i32 %mul40.ripple.branch.clone
+ %arrayidx42.ripple.branch.clone = getelementptr i8, ptr %1, i32 %ripple.par.iv15.0.lcssa
+ %arrayidx42.ripple.LS.instance.ripple.branch.clone = getelementptr i8, ptr %arrayidx42.ripple.branch.clone, <64 x i32> %arrayidx27.ripple.LS.slope
+ tail call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> %mul38.ripple.LS.instance.ripple.branch.clone, <64 x ptr> %arrayidx42.ripple.LS.instance.ripple.branch.clone, i32 1, <64 x i1> %or.cond.ripple.LS.instance.ripple.bcast)
+ %add48 = add nuw i32 %ripple.par.iv.085, 1
+ %cmp = icmp ult i32 %add48, %div1194
+ br i1 %cmp, label %for.body, label %for.end49
+
+for.end49: ; preds = %for.end, %entry
+ ret void
+}
+
+;; CHECK: outer_product
+;; CHECK: {{r[0-9]+}} = lsr({{r[0-9]+}},#3)
+;; CHECK: {{q[0-9]+}} = vand({{v[0-9]+}},{{r[0-9]+}})
+;; CHECK: {{v[0-9]+}} = vmux(q0,{{v[0-9]+}},{{v[0-9]+}})
+;; CHECK: vmem{{.*}} = {{v[0-9]+}}
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/implicit_def.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/implicit_def.mir
index 4f43780..7e1f5d3 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/implicit_def.mir
+++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/implicit_def.mir
@@ -23,11 +23,11 @@ body: |
bb.1.entry:
; MIPS32-LABEL: name: g_i32
; MIPS32: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
- ; MIPS32: $a0 = COPY [[DEF]](s32)
- ; MIPS32: JAL @f_i32, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0
- ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
- ; MIPS32: RetRA
+ ; MIPS32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32-NEXT: $a0 = COPY [[DEF]](s32)
+ ; MIPS32-NEXT: JAL @f_i32, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0
+ ; MIPS32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32-NEXT: RetRA
%0:_(s32) = G_IMPLICIT_DEF
ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
$a0 = COPY %0(s32)
@@ -43,14 +43,13 @@ tracksRegLiveness: true
body: |
bb.1.entry:
; MIPS32-LABEL: name: g_i64
- ; MIPS32: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
- ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
- ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
- ; MIPS32: $a0 = COPY [[UV]](s32)
- ; MIPS32: $a1 = COPY [[UV1]](s32)
- ; MIPS32: JAL @f_i64, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1
- ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
- ; MIPS32: RetRA
+ ; MIPS32: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; MIPS32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32-NEXT: $a0 = COPY [[DEF]](s32)
+ ; MIPS32-NEXT: $a1 = COPY [[DEF]](s32)
+ ; MIPS32-NEXT: JAL @f_i64, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1
+ ; MIPS32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32-NEXT: RetRA
%0:_(s64) = G_IMPLICIT_DEF
ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
%1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(s64)
@@ -69,11 +68,11 @@ body: |
bb.1.entry:
; MIPS32-LABEL: name: g_float
; MIPS32: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
- ; MIPS32: $f12 = COPY [[DEF]](s32)
- ; MIPS32: JAL @f_float, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12
- ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
- ; MIPS32: RetRA
+ ; MIPS32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32-NEXT: $f12 = COPY [[DEF]](s32)
+ ; MIPS32-NEXT: JAL @f_float, csr_o32, implicit-def $ra, implicit-def $sp, implicit $f12
+ ; MIPS32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32-NEXT: RetRA
%0:_(s32) = G_IMPLICIT_DEF
ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
$f12 = COPY %0(s32)
@@ -90,11 +89,11 @@ body: |
bb.1.entry:
; MIPS32-LABEL: name: g_double
; MIPS32: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
- ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
- ; MIPS32: $d6 = COPY [[DEF]](s64)
- ; MIPS32: JAL @f_double, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6
- ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
- ; MIPS32: RetRA
+ ; MIPS32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32-NEXT: $d6 = COPY [[DEF]](s64)
+ ; MIPS32-NEXT: JAL @f_double, csr_o32, implicit-def $ra, implicit-def $sp, implicit $d6
+ ; MIPS32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+ ; MIPS32-NEXT: RetRA
%0:_(s64) = G_IMPLICIT_DEF
ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
$d6 = COPY %0(s64)
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/implicit_def.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/implicit_def.ll
index 7c94a5b..c8f00c5 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/implicit_def.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/implicit_def.ll
@@ -29,8 +29,8 @@ define void @g_i64() {
; MIPS32-NEXT: .cfi_def_cfa_offset 24
; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; MIPS32-NEXT: .cfi_offset 31, -4
-; MIPS32-NEXT: # implicit-def: $a0
; MIPS32-NEXT: # implicit-def: $a1
+; MIPS32-NEXT: move $4, $5
; MIPS32-NEXT: jal f_i64
; MIPS32-NEXT: nop
; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/bittest.ll b/llvm/test/CodeGen/PowerPC/bittest.ll
new file mode 100644
index 0000000..cba56e3
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/bittest.ll
@@ -0,0 +1,193 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs < %s -O3 -mcpu=ppc -mtriple powerpc-ibm-aix \
+; RUN: -ppc-asm-full-reg-names | FileCheck %s
+
+define i32 @foo(i32 noundef signext %x) {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stwu r1, -64(r1)
+; CHECK-NEXT: stw r0, 72(r1)
+; CHECK-NEXT: cmpwi r3, 8
+; CHECK-NEXT: stw r31, 60(r1) # 4-byte Folded Spill
+; CHECK-NEXT: mr r31, r3
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: ble cr0, L..BB0_4
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: cmpwi r31, 11
+; CHECK-NEXT: bge cr0, L..BB0_7
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: cmplwi r31, 9
+; CHECK-NEXT: beq cr0, L..BB0_9
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: cmplwi r31, 10
+; CHECK-NEXT: beq cr0, L..BB0_11
+; CHECK-NEXT: b L..BB0_13
+; CHECK-NEXT: L..BB0_4: # %entry
+; CHECK-NEXT: cmplwi r31, 4
+; CHECK-NEXT: beq cr0, L..BB0_12
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: cmplwi r31, 7
+; CHECK-NEXT: beq cr0, L..BB0_11
+; CHECK-NEXT: # %bb.6: # %entry
+; CHECK-NEXT: cmplwi r31, 8
+; CHECK-NEXT: beq cr0, L..BB0_10
+; CHECK-NEXT: b L..BB0_13
+; CHECK-NEXT: L..BB0_7: # %entry
+; CHECK-NEXT: beq cr0, L..BB0_10
+; CHECK-NEXT: # %bb.8: # %entry
+; CHECK-NEXT: cmplwi r31, 12
+; CHECK-NEXT: bne cr0, L..BB0_13
+; CHECK-NEXT: L..BB0_9: # %sw.bb2
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: bl .foo3[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: b L..BB0_13
+; CHECK-NEXT: L..BB0_10: # %sw.bb1
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: bl .foo2[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: b L..BB0_13
+; CHECK-NEXT: L..BB0_11: # %sw.bb
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: bl .foo1[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: b L..BB0_13
+; CHECK-NEXT: L..BB0_12: # %sw.bb3
+; CHECK-NEXT: li r3, 4
+; CHECK-NEXT: bl .foo4[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: li r3, 4
+; CHECK-NEXT: L..BB0_13: # %return
+; CHECK-NEXT: lwz r31, 60(r1) # 4-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 64
+; CHECK-NEXT: lwz r0, 8(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+entry:
+ switch i32 %x, label %return [
+ i32 7, label %sw.bb
+ i32 10, label %sw.bb
+ i32 8, label %sw.bb1
+ i32 11, label %sw.bb1
+ i32 9, label %sw.bb2
+ i32 12, label %sw.bb2
+ i32 4, label %sw.bb3
+ ]
+
+sw.bb: ; preds = %entry, %entry
+ tail call void @foo1(i32 noundef signext %x)
+ br label %return
+
+sw.bb1: ; preds = %entry, %entry
+ tail call void @foo2(i32 noundef signext %x)
+ br label %return
+
+sw.bb2: ; preds = %entry, %entry
+ tail call void @foo3(i32 noundef signext %x)
+ br label %return
+
+sw.bb3: ; preds = %entry
+ tail call void @foo4(i32 noundef signext 4)
+ br label %return
+
+return: ; preds = %sw.bb, %sw.bb1, %sw.bb2, %sw.bb3, %entry
+ %retval.0 = phi i32 [ 0, %entry ], [ 4, %sw.bb3 ], [ %x, %sw.bb2 ], [ %x, %sw.bb1 ], [ %x, %sw.bb ]
+ ret i32 %retval.0
+}
+
+define i32 @goo(i32 noundef signext %x) {
+; CHECK-LABEL: goo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: stwu r1, -64(r1)
+; CHECK-NEXT: stw r0, 72(r1)
+; CHECK-NEXT: cmplwi r3, 12
+; CHECK-NEXT: stw r31, 60(r1) # 4-byte Folded Spill
+; CHECK-NEXT: mr r31, r3
+; CHECK-NEXT: bgt cr0, L..BB1_7
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: li r3, 1
+; CHECK-NEXT: slw r3, r3, r31
+; CHECK-NEXT: andi. r4, r3, 5632
+; CHECK-NEXT: bne cr0, L..BB1_4
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: andi. r3, r3, 2304
+; CHECK-NEXT: beq cr0, L..BB1_5
+; CHECK-NEXT: # %bb.3: # %sw.bb1
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: bl .foo2[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: b L..BB1_9
+; CHECK-NEXT: L..BB1_4: # %sw.bb2
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: bl .foo3[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: b L..BB1_9
+; CHECK-NEXT: L..BB1_5: # %entry
+; CHECK-NEXT: cmplwi r31, 7
+; CHECK-NEXT: bne cr0, L..BB1_7
+; CHECK-NEXT: # %bb.6: # %sw.bb
+; CHECK-NEXT: li r3, 7
+; CHECK-NEXT: li r31, 7
+; CHECK-NEXT: bl .foo1[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: b L..BB1_9
+; CHECK-NEXT: L..BB1_7: # %entry
+; CHECK-NEXT: cmplwi r31, 4
+; CHECK-NEXT: li r31, 0
+; CHECK-NEXT: bne cr0, L..BB1_9
+; CHECK-NEXT: # %bb.8: # %sw.bb3
+; CHECK-NEXT: li r3, 4
+; CHECK-NEXT: li r31, 4
+; CHECK-NEXT: bl .foo4[PR]
+; CHECK-NEXT: nop
+; CHECK-NEXT: L..BB1_9: # %return
+; CHECK-NEXT: mr r3, r31
+; CHECK-NEXT: lwz r31, 60(r1) # 4-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 64
+; CHECK-NEXT: lwz r0, 8(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+entry:
+ switch i32 %x, label %return [
+ i32 7, label %sw.bb
+ i32 8, label %sw.bb1
+ i32 11, label %sw.bb1
+ i32 9, label %sw.bb2
+ i32 10, label %sw.bb2
+ i32 12, label %sw.bb2
+ i32 4, label %sw.bb3
+ ]
+
+sw.bb: ; preds = %entry
+ tail call void @foo1(i32 noundef signext 7)
+ br label %return
+
+sw.bb1: ; preds = %entry, %entry
+ tail call void @foo2(i32 noundef signext %x)
+ br label %return
+
+sw.bb2: ; preds = %entry, %entry, %entry
+ tail call void @foo3(i32 noundef signext %x)
+ br label %return
+
+sw.bb3: ; preds = %entry
+ tail call void @foo4(i32 noundef signext 4)
+ br label %return
+
+return: ; preds = %sw.bb, %sw.bb1, %sw.bb2, %sw.bb3, %entry
+ %retval.0 = phi i32 [ 0, %entry ], [ 4, %sw.bb3 ], [ %x, %sw.bb2 ], [ %x, %sw.bb1 ], [ 7, %sw.bb ]
+ ret i32 %retval.0
+}
+
+declare void @foo1(i32 noundef signext)
+
+declare void @foo2(i32 noundef signext)
+
+declare void @foo3(i32 noundef signext)
+
+declare void @foo4(i32 noundef signext)
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-undef-vec-scaling.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-undef-vec-scaling.mir
index b02832b..b996217 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-undef-vec-scaling.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-undef-vec-scaling.mir
@@ -1,18 +1,26 @@
-# RUN: llc -mtriple=x86_64-linux-gnu -mattr=avx2 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - | FileCheck %s --check-prefixes=CHECK,AVX2
-# RUN: llc -mtriple=x86_64-linux-gnu -mattr=sse2 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - | FileCheck %s --check-prefixes=CHECK,SSE2
-# RUN: llc -mtriple=x86_64-linux-gnu -mattr=avx512f -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - | FileCheck %s --check-prefixes=CHECK,AVX512F
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=avx2 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - | FileCheck %s --check-prefixes=AVX2
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=sse2 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - | FileCheck %s --check-prefixes=SSE2
+# RUN: llc -mtriple=x86_64-linux-gnu -mattr=avx512f -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - | FileCheck %s --check-prefixes=AVX512F
---
name: test_basic_g_implicit_def_v8i64
body: |
bb.0:
- ; CHECK-LABEL: name: test_basic_g_implicit_def_v8i64
- ; AVX512F: {{%[0-9]+}}:_(<8 x s64>) = G_IMPLICIT_DEF
- ; AVX2: [[DEF_AVX2:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; AVX2-NEXT: {{%[0-9]+}}:_(<8 x s64>) = G_CONCAT_VECTORS [[DEF_AVX2]](<4 x s64>), [[DEF_AVX2]](<4 x s64>)
- ; SSE2: [[DEF_SSE2:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
- ; SSE2-NEXT: {{%[0-9]+}}:_(<8 x s64>) = G_CONCAT_VECTORS [[DEF_SSE2]](<2 x s64>), [[DEF_SSE2]](<2 x s64>), [[DEF_SSE2]](<2 x s64>), [[DEF_SSE2]](<2 x s64>)
+ ; AVX2-LABEL: name: test_basic_g_implicit_def_v8i64
+ ; AVX2: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+ ; AVX2-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s64>) = G_CONCAT_VECTORS [[DEF]](<4 x s64>), [[DEF]](<4 x s64>)
+ ; AVX2-NEXT: RET 0, implicit [[CONCAT_VECTORS]](<8 x s64>)
+ ;
+ ; SSE2-LABEL: name: test_basic_g_implicit_def_v8i64
+ ; SSE2: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+ ; SSE2-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s64>) = G_CONCAT_VECTORS [[DEF]](<2 x s64>), [[DEF]](<2 x s64>), [[DEF]](<2 x s64>), [[DEF]](<2 x s64>)
+ ; SSE2-NEXT: RET 0, implicit [[CONCAT_VECTORS]](<8 x s64>)
+ ;
+ ; AVX512F-LABEL: name: test_basic_g_implicit_def_v8i64
+ ; AVX512F: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF
+ ; AVX512F-NEXT: RET 0, implicit [[DEF]](<8 x s64>)
%0:_(<8 x s64>) = G_IMPLICIT_DEF
RET 0, implicit %0
...
@@ -21,10 +29,36 @@ body: |
name: test_g_implicit_def_cample_size
body: |
bb.1:
- ; CHECK-LABEL: name: test_g_implicit_def_cample_size
- ; AVX512: {{%[0-9]+}}:_(<8 x s64>) = G_IMPLICIT_DEF
- ; AVX2: {{%[0-9]+}}:_(<4 x s64>) = G_IMPLICIT_DEF
- ; SSE2: {{%[0-9]+}}:_(<2 x s64>) = G_IMPLICIT_DEF
+ ; AVX2-LABEL: name: test_g_implicit_def_cample_size
+ ; AVX2: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; AVX2-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+ ; AVX2-NEXT: [[TRUNC:%[0-9]+]]:_(<5 x s63>) = G_TRUNC [[BUILD_VECTOR]](<5 x s64>)
+ ; AVX2-NEXT: RET 0, implicit [[TRUNC]](<5 x s63>)
+ ;
+ ; SSE2-LABEL: name: test_g_implicit_def_cample_size
+ ; SSE2: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; SSE2-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+ ; SSE2-NEXT: [[TRUNC:%[0-9]+]]:_(<5 x s63>) = G_TRUNC [[BUILD_VECTOR]](<5 x s64>)
+ ; SSE2-NEXT: RET 0, implicit [[TRUNC]](<5 x s63>)
+ ;
+ ; AVX512F-LABEL: name: test_g_implicit_def_cample_size
+ ; AVX512F: [[CONSTANT_POOL:%[0-9]+]]:_(p0) = G_CONSTANT_POOL %const.0
+ ; AVX512F-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[CONSTANT_POOL]](p0) :: (load (s64) from constant-pool, align 64)
+ ; AVX512F-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; AVX512F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[CONSTANT_POOL]], [[C]](s64)
+ ; AVX512F-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from constant-pool + 8, basealign 64)
+ ; AVX512F-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; AVX512F-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[CONSTANT_POOL]], [[C1]](s64)
+ ; AVX512F-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64) from constant-pool + 16, align 16, basealign 64)
+ ; AVX512F-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
+ ; AVX512F-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[CONSTANT_POOL]], [[C2]](s64)
+ ; AVX512F-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p0) :: (load (s64) from constant-pool + 24, basealign 64)
+ ; AVX512F-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; AVX512F-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[CONSTANT_POOL]], [[C3]](s64)
+ ; AVX512F-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p0) :: (load (s64) from constant-pool + 32, align 32, basealign 64)
+ ; AVX512F-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64), [[LOAD4]](s64)
+ ; AVX512F-NEXT: [[TRUNC:%[0-9]+]]:_(<5 x s63>) = G_TRUNC [[BUILD_VECTOR]](<5 x s64>)
+ ; AVX512F-NEXT: RET 0, implicit [[TRUNC]](<5 x s63>)
%0:_(<5 x s63>) = G_IMPLICIT_DEF
RET 0, implicit %0
...
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir
index b16fe3e..03f1be6 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-undef.mir
@@ -32,12 +32,10 @@ body: |
; X32-NEXT: G_STORE [[DEF2]](s16), [[DEF]](p0) :: (store (s16))
; X32-NEXT: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; X32-NEXT: G_STORE [[DEF3]](s32), [[DEF]](p0) :: (store (s32))
- ; X32-NEXT: [[DEF4:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
- ; X32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF4]](s64)
- ; X32-NEXT: G_STORE [[UV]](s32), [[DEF]](p0) :: (store (s32), align 8)
+ ; X32-NEXT: G_STORE [[DEF3]](s32), [[DEF]](p0) :: (store (s32), align 8)
; X32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; X32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD [[DEF]], [[C1]](s32)
- ; X32-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4)
+ ; X32-NEXT: G_STORE [[DEF3]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4)
%5:_(p0) = G_IMPLICIT_DEF
%0:_(s1) = G_IMPLICIT_DEF
G_STORE %0, %5 ::(store (s1))
diff --git a/llvm/test/CodeGen/X86/bfloat-calling-conv.ll b/llvm/test/CodeGen/X86/bfloat-calling-conv.ll
index ea4d32b..d087491 100644
--- a/llvm/test/CodeGen/X86/bfloat-calling-conv.ll
+++ b/llvm/test/CodeGen/X86/bfloat-calling-conv.ll
@@ -660,8 +660,7 @@ define <3 x bfloat> @call_ret_v3bf16(ptr %ptr) #0 {
; SSE2-LABEL: call_ret_v3bf16:
; SSE2: # %bb.0:
; SSE2-NEXT: pushq %rax
-; SSE2-NEXT: movl 4(%rdi), %eax
-; SSE2-NEXT: pinsrw $0, %eax, %xmm1
+; SSE2-NEXT: pinsrw $0, 4(%rdi), %xmm1
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: callq returns_v3bf16@PLT
@@ -725,8 +724,7 @@ define <3 x bfloat> @call_ret_v3bf16(ptr %ptr) #0 {
; AVXNECONVERT-LABEL: call_ret_v3bf16:
; AVXNECONVERT: # %bb.0:
; AVXNECONVERT-NEXT: pushq %rax
-; AVXNECONVERT-NEXT: movl 4(%rdi), %eax
-; AVXNECONVERT-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; AVXNECONVERT-NEXT: vpinsrw $0, 4(%rdi), %xmm0, %xmm0
; AVXNECONVERT-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVXNECONVERT-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
; AVXNECONVERT-NEXT: callq returns_v3bf16@PLT
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
new file mode 100644
index 0000000..19d751d1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -0,0 +1,7027 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
+
+; bt/btc/btr/bts patterns + 'init' to set single bit value in large integers
+
+;
+; i32 bt/btc/btr/bts + init (reference)
+;
+
+define i1 @test_eq_i32(ptr %word, i32 %position) nounwind {
+; X86-LABEL: test_eq_i32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl (%eax), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: btl %ecx, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: retl
+;
+; X64-LABEL: test_eq_i32:
+; X64: # %bb.0:
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: btl %esi, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: retq
+ %rem = and i32 %position, 31
+ %bit = shl nuw i32 1, %rem
+ %ld = load i32, ptr %word
+ %test = and i32 %ld, %bit
+ %cmp = icmp eq i32 %test, 0
+ ret i1 %cmp
+}
+
+define i1 @complement_ne_i32(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_ne_i32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: btcl %eax, %esi
+; X86-NEXT: btl %eax, %edx
+; X86-NEXT: setb %al
+; X86-NEXT: movl %esi, (%ecx)
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: complement_ne_i32:
+; X64: # %bb.0:
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: btcl %esi, %ecx
+; X64-NEXT: btl %esi, %eax
+; X64-NEXT: setb %al
+; X64-NEXT: movl %ecx, (%rdi)
+; X64-NEXT: retq
+ %ofs = and i32 %position, 31
+ %bit = shl nuw i32 1, %ofs
+ %ld = load i32, ptr %word
+ %test = and i32 %ld, %bit
+ %res = xor i32 %ld, %bit
+ %cmp = icmp ne i32 %test, 0
+ store i32 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @reset_eq_i32(ptr %word, i32 %position) nounwind {
+; X86-LABEL: reset_eq_i32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: btrl %eax, %esi
+; X86-NEXT: btl %eax, %edx
+; X86-NEXT: setae %al
+; X86-NEXT: movl %esi, (%ecx)
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: reset_eq_i32:
+; X64: # %bb.0:
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: btrl %esi, %ecx
+; X64-NEXT: btl %esi, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: movl %ecx, (%rdi)
+; X64-NEXT: retq
+ %ofs = and i32 %position, 31
+ %bit = shl nuw i32 1, %ofs
+ %mask = xor i32 %bit, -1
+ %ld = load i32, ptr %word
+ %test = and i32 %ld, %bit
+ %res = and i32 %ld, %mask
+ %cmp = icmp eq i32 %test, 0
+ store i32 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @set_ne_i32(ptr %word, i32 %position) nounwind {
+; X86-LABEL: set_ne_i32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: btsl %eax, %esi
+; X86-NEXT: btl %eax, %edx
+; X86-NEXT: setb %al
+; X86-NEXT: movl %esi, (%ecx)
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: set_ne_i32:
+; X64: # %bb.0:
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: btsl %esi, %ecx
+; X64-NEXT: btl %esi, %eax
+; X64-NEXT: setb %al
+; X64-NEXT: movl %ecx, (%rdi)
+; X64-NEXT: retq
+ %ofs = and i32 %position, 31
+ %bit = shl nuw i32 1, %ofs
+ %ld = load i32, ptr %word
+ %test = and i32 %ld, %bit
+ %res = or i32 %ld, %bit
+ %cmp = icmp ne i32 %test, 0
+ store i32 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @init_eq_i32(ptr %word, i32 %position, i1 zeroext %value) nounwind {
+; X86-LABEL: init_eq_i32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: movl (%edx), %esi
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: btrl %ecx, %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: btl %ecx, %esi
+; X86-NEXT: setae %al
+; X86-NEXT: movl %edi, (%edx)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl
+;
+; SSE-LABEL: init_eq_i32:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: shll %cl, %edx
+; SSE-NEXT: movl (%rdi), %eax
+; SSE-NEXT: movl %eax, %esi
+; SSE-NEXT: btrl %ecx, %esi
+; SSE-NEXT: orl %edx, %esi
+; SSE-NEXT: btl %ecx, %eax
+; SSE-NEXT: setae %al
+; SSE-NEXT: movl %esi, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: init_eq_i32:
+; AVX: # %bb.0:
+; AVX-NEXT: shlxl %esi, %edx, %eax
+; AVX-NEXT: movl (%rdi), %ecx
+; AVX-NEXT: movl %ecx, %edx
+; AVX-NEXT: btrl %esi, %edx
+; AVX-NEXT: orl %eax, %edx
+; AVX-NEXT: btl %esi, %ecx
+; AVX-NEXT: setae %al
+; AVX-NEXT: movl %edx, (%rdi)
+; AVX-NEXT: retq
+ %ofs = and i32 %position, 31
+ %bit = shl nuw i32 1, %ofs
+ %mask = xor i32 %bit, -1
+ %val0 = zext i1 %value to i32
+ %val = shl nuw i32 %val0, %ofs
+ %ld = load i32, ptr %word
+ %test = and i32 %ld, %bit
+ %res0 = and i32 %ld, %mask
+ %res = or i32 %res0, %val
+ %cmp = icmp eq i32 %test, 0
+ store i32 %res, ptr %word
+ ret i1 %cmp
+}
+
+;
+; i64 bt/btc/btr/bts + init
+;
+
+define i1 @test_ne_i64(ptr %word, i32 %position) nounwind {
+; X86-LABEL: test_ne_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $1, %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: shldl %cl, %edx, %esi
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: testb $32, %cl
+; X86-NEXT: je .LBB5_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: .LBB5_2:
+; X86-NEXT: andl 4(%eax), %esi
+; X86-NEXT: andl (%eax), %edx
+; X86-NEXT: orl %esi, %edx
+; X86-NEXT: setne %al
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: test_ne_i64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: btq %rsi, %rax
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+ %rem = and i32 %position, 63
+ %ofs = zext nneg i32 %rem to i64
+ %bit = shl nuw i64 1, %ofs
+ %ld = load i64, ptr %word
+ %test = and i64 %ld, %bit
+ %cmp = icmp ne i64 %test, 0
+ ret i1 %cmp
+}
+
+define i1 @complement_ne_i64(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_ne_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: testb $32, %cl
+; X86-NEXT: je .LBB6_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB6_2:
+; X86-NEXT: movl (%edx), %ecx
+; X86-NEXT: movl 4(%edx), %edi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: andl %esi, %ebx
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: andl %eax, %ebp
+; X86-NEXT: xorl %esi, %edi
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: orl %ebx, %ebp
+; X86-NEXT: setne %al
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %edi, 4(%edx)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: complement_ne_i64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: btcq %rsi, %rcx
+; X64-NEXT: btq %rsi, %rax
+; X64-NEXT: setb %al
+; X64-NEXT: movq %rcx, (%rdi)
+; X64-NEXT: retq
+ %rem = and i32 %position, 63
+ %ofs = zext nneg i32 %rem to i64
+ %bit = shl nuw i64 1, %ofs
+ %ld = load i64, ptr %word
+ %test = and i64 %ld, %bit
+ %res = xor i64 %ld, %bit
+ %cmp = icmp ne i64 %test, 0
+ store i64 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @reset_eq_i64(ptr %word, i32 %position) nounwind {
+; X86-LABEL: reset_eq_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $1, %esi
+; X86-NEXT: xorl %edi, %edi
+; X86-NEXT: shldl %cl, %esi, %edi
+; X86-NEXT: shll %cl, %esi
+; X86-NEXT: testb $32, %cl
+; X86-NEXT: je .LBB7_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: .LBB7_2:
+; X86-NEXT: movl (%edx), %eax
+; X86-NEXT: movl 4(%edx), %ecx
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: andl %edi, %ebx
+; X86-NEXT: notl %edi
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: andl %esi, %ebp
+; X86-NEXT: notl %esi
+; X86-NEXT: andl %ecx, %edi
+; X86-NEXT: andl %eax, %esi
+; X86-NEXT: orl %ebx, %ebp
+; X86-NEXT: sete %al
+; X86-NEXT: movl %esi, (%edx)
+; X86-NEXT: movl %edi, 4(%edx)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: reset_eq_i64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: btrq %rsi, %rcx
+; X64-NEXT: btq %rsi, %rax
+; X64-NEXT: setae %al
+; X64-NEXT: movq %rcx, (%rdi)
+; X64-NEXT: retq
+ %rem = and i32 %position, 63
+ %ofs = zext nneg i32 %rem to i64
+ %bit = shl nuw i64 1, %ofs
+ %mask = xor i64 %bit, -1
+ %ld = load i64, ptr %word
+ %test = and i64 %ld, %bit
+ %res = and i64 %ld, %mask
+ %cmp = icmp eq i64 %test, 0
+ store i64 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @set_ne_i64(ptr %word, i32 %position) nounwind {
+; X86-LABEL: set_ne_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: testb $32, %cl
+; X86-NEXT: je .LBB8_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB8_2:
+; X86-NEXT: movl (%edx), %ecx
+; X86-NEXT: movl 4(%edx), %edi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: andl %esi, %ebx
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: andl %eax, %ebp
+; X86-NEXT: orl %esi, %edi
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: orl %ebx, %ebp
+; X86-NEXT: setne %al
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %edi, 4(%edx)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: set_ne_i64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: btsq %rsi, %rcx
+; X64-NEXT: btq %rsi, %rax
+; X64-NEXT: setb %al
+; X64-NEXT: movq %rcx, (%rdi)
+; X64-NEXT: retq
+ %rem = and i32 %position, 63
+ %ofs = zext nneg i32 %rem to i64
+ %bit = shl nuw i64 1, %ofs
+ %ld = load i64, ptr %word
+ %test = and i64 %ld, %bit
+ %res = or i64 %ld, %bit
+ %cmp = icmp ne i64 %test, 0
+ store i64 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind {
+; X86-LABEL: init_eq_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: xorl %edi, %edi
+; X86-NEXT: shldl %cl, %esi, %edi
+; X86-NEXT: shll %cl, %esi
+; X86-NEXT: testb $32, %cl
+; X86-NEXT: je .LBB9_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: .LBB9_2:
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: notl %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: notl %ebp
+; X86-NEXT: je .LBB9_4
+; X86-NEXT: # %bb.3:
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: .LBB9_4:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl 4(%ecx), %ecx
+; X86-NEXT: andl %ecx, %edx
+; X86-NEXT: andl %ecx, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl (%edi), %ecx
+; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: andl %ecx, %ebp
+; X86-NEXT: orl %esi, %ebp
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl %ebp, (%edi)
+; X86-NEXT: movl %ebx, 4(%edi)
+; X86-NEXT: sete %al
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: init_eq_i64:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl %edx, %eax
+; SSE-NEXT: shlq %cl, %rax
+; SSE-NEXT: movq (%rdi), %rdx
+; SSE-NEXT: movq %rdx, %rsi
+; SSE-NEXT: btrq %rcx, %rsi
+; SSE-NEXT: orq %rax, %rsi
+; SSE-NEXT: btq %rcx, %rdx
+; SSE-NEXT: setae %al
+; SSE-NEXT: movq %rsi, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: init_eq_i64:
+; AVX: # %bb.0:
+; AVX-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX-NEXT: movl %edx, %eax
+; AVX-NEXT: shlxq %rsi, %rax, %rax
+; AVX-NEXT: movq (%rdi), %rcx
+; AVX-NEXT: movq %rcx, %rdx
+; AVX-NEXT: btrq %rsi, %rdx
+; AVX-NEXT: orq %rax, %rdx
+; AVX-NEXT: btq %rsi, %rcx
+; AVX-NEXT: setae %al
+; AVX-NEXT: movq %rdx, (%rdi)
+; AVX-NEXT: retq
+ %rem = and i32 %position, 63
+ %ofs = zext nneg i32 %rem to i64
+ %bit = shl nuw i64 1, %ofs
+ %mask = xor i64 %bit, -1
+ %val0 = zext i1 %value to i64
+ %val = shl nuw i64 %val0, %ofs
+ %ld = load i64, ptr %word
+ %test = and i64 %ld, %bit
+ %res0 = and i64 %ld, %mask
+ %res = or i64 %res0, %val
+ %cmp = icmp eq i64 %test, 0
+ store i64 %res, ptr %word
+ ret i1 %cmp
+}
+
+;
+; i128
+;
+
+define i1 @test_ne_i128(ptr %word, i32 %position) nounwind {
+; X86-LABEL: test_ne_i128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $48, %esp
+; X86-NEXT: movzbl 12(%ebp), %ecx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, (%esp)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: negb %al
+; X86-NEXT: movsbl %al, %esi
+; X86-NEXT: movl 24(%esp,%esi), %edi
+; X86-NEXT: movl 28(%esp,%esi), %eax
+; X86-NEXT: shldl %cl, %edi, %eax
+; X86-NEXT: movl 16(%esp,%esi), %edx
+; X86-NEXT: movl 20(%esp,%esi), %esi
+; X86-NEXT: shldl %cl, %esi, %edi
+; X86-NEXT: shldl %cl, %edx, %esi
+; X86-NEXT: movl 8(%ebp), %ebx
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: andl 8(%ebx), %edi
+; X86-NEXT: andl (%ebx), %edx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: andl 12(%ebx), %eax
+; X86-NEXT: andl 4(%ebx), %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: setne %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: test_ne_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl $1, %eax
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: shldq %cl, %rax, %rdx
+; SSE-NEXT: xorl %esi, %esi
+; SSE-NEXT: shlq %cl, %rax
+; SSE-NEXT: testb $64, %cl
+; SSE-NEXT: cmovneq %rax, %rdx
+; SSE-NEXT: cmovneq %rsi, %rax
+; SSE-NEXT: andq 8(%rdi), %rdx
+; SSE-NEXT: andq (%rdi), %rax
+; SSE-NEXT: orq %rdx, %rax
+; SSE-NEXT: setne %al
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_ne_i128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: movl $1, %edx
+; AVX2-NEXT: xorl %esi, %esi
+; AVX2-NEXT: shldq %cl, %rdx, %rsi
+; AVX2-NEXT: shlxq %rcx, %rdx, %rdx
+; AVX2-NEXT: testb $64, %cl
+; AVX2-NEXT: cmovneq %rdx, %rsi
+; AVX2-NEXT: cmovneq %rax, %rdx
+; AVX2-NEXT: andq 8(%rdi), %rsi
+; AVX2-NEXT: andq (%rdi), %rdx
+; AVX2-NEXT: orq %rsi, %rdx
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_ne_i128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: movl $1, %eax
+; AVX512-NEXT: xorl %edx, %edx
+; AVX512-NEXT: shldq %cl, %rax, %rdx
+; AVX512-NEXT: xorl %esi, %esi
+; AVX512-NEXT: shlxq %rcx, %rax, %rax
+; AVX512-NEXT: testb $64, %cl
+; AVX512-NEXT: cmovneq %rax, %rdx
+; AVX512-NEXT: cmovneq %rsi, %rax
+; AVX512-NEXT: andq 8(%rdi), %rdx
+; AVX512-NEXT: andq (%rdi), %rax
+; AVX512-NEXT: orq %rdx, %rax
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: retq
+ %rem = and i32 %position, 127
+ %ofs = zext nneg i32 %rem to i128
+ %bit = shl nuw i128 1, %ofs
+ %ld = load i128, ptr %word
+ %test = and i128 %ld, %bit
+ %cmp = icmp ne i128 %test, 0
+ ret i1 %cmp
+}
+
+define i1 @complement_ne_i128(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_ne_i128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $80, %esp
+; X86-NEXT: movzbl 12(%ebp), %ecx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: negb %al
+; X86-NEXT: movsbl %al, %eax
+; X86-NEXT: movl 56(%esp,%eax), %esi
+; X86-NEXT: movl 60(%esp,%eax), %edx
+; X86-NEXT: shldl %cl, %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%esp,%eax), %edi
+; X86-NEXT: movl 52(%esp,%eax), %ebx
+; X86-NEXT: shldl %cl, %ebx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edi, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: shll %cl, %edi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl 8(%eax), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: andl %edi, %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl 12(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 4(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %esi, 12(%eax)
+; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: movl %ebx, 4(%eax)
+; X86-NEXT: setne %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: complement_ne_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl $1, %edx
+; SSE-NEXT: xorl %esi, %esi
+; SSE-NEXT: shldq %cl, %rdx, %rsi
+; SSE-NEXT: shlq %cl, %rdx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: testb $64, %cl
+; SSE-NEXT: cmovneq %rdx, %rsi
+; SSE-NEXT: cmovneq %rax, %rdx
+; SSE-NEXT: movq (%rdi), %rax
+; SSE-NEXT: movq 8(%rdi), %rcx
+; SSE-NEXT: movq %rcx, %r8
+; SSE-NEXT: andq %rsi, %r8
+; SSE-NEXT: movq %rax, %r9
+; SSE-NEXT: andq %rdx, %r9
+; SSE-NEXT: xorq %rcx, %rsi
+; SSE-NEXT: xorq %rax, %rdx
+; SSE-NEXT: orq %r8, %r9
+; SSE-NEXT: setne %al
+; SSE-NEXT: movq %rdx, (%rdi)
+; SSE-NEXT: movq %rsi, 8(%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: complement_ne_i128:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %esi, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: movl $1, %edx
+; AVX-NEXT: xorl %esi, %esi
+; AVX-NEXT: shldq %cl, %rdx, %rsi
+; AVX-NEXT: shlxq %rcx, %rdx, %rdx
+; AVX-NEXT: testb $64, %cl
+; AVX-NEXT: cmovneq %rdx, %rsi
+; AVX-NEXT: cmovneq %rax, %rdx
+; AVX-NEXT: movq (%rdi), %rax
+; AVX-NEXT: movq 8(%rdi), %rcx
+; AVX-NEXT: movq %rcx, %r8
+; AVX-NEXT: andq %rsi, %r8
+; AVX-NEXT: movq %rax, %r9
+; AVX-NEXT: andq %rdx, %r9
+; AVX-NEXT: xorq %rcx, %rsi
+; AVX-NEXT: xorq %rax, %rdx
+; AVX-NEXT: orq %r8, %r9
+; AVX-NEXT: setne %al
+; AVX-NEXT: movq %rdx, (%rdi)
+; AVX-NEXT: movq %rsi, 8(%rdi)
+; AVX-NEXT: retq
+ %rem = and i32 %position, 127
+ %ofs = zext nneg i32 %rem to i128
+ %bit = shl nuw i128 1, %ofs
+ %ld = load i128, ptr %word
+ %test = and i128 %ld, %bit
+ %res = xor i128 %ld, %bit
+ %cmp = icmp ne i128 %test, 0
+ store i128 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @reset_eq_i128(ptr %word, i32 %position) nounwind {
+; X86-LABEL: reset_eq_i128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $80, %esp
+; X86-NEXT: movzbl 12(%ebp), %ecx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: negb %al
+; X86-NEXT: movsbl %al, %eax
+; X86-NEXT: movl 56(%esp,%eax), %edx
+; X86-NEXT: movl 60(%esp,%eax), %esi
+; X86-NEXT: shldl %cl, %edx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%esp,%eax), %esi
+; X86-NEXT: movl 52(%esp,%eax), %edi
+; X86-NEXT: shldl %cl, %edi, %edx
+; X86-NEXT: shldl %cl, %esi, %edi
+; X86-NEXT: movl 8(%ebp), %ebx
+; X86-NEXT: shll %cl, %esi
+; X86-NEXT: movl 8(%ebx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edx, %eax
+; X86-NEXT: movl (%ebx), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%ebx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: movl 4(%ebx), %ebx
+; X86-NEXT: andl %ebx, %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: notl %eax
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: notl %ecx
+; X86-NEXT: andl %ebx, %ecx
+; X86-NEXT: notl %esi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl 8(%ebp), %edi
+; X86-NEXT: movl %edx, 8(%edi)
+; X86-NEXT: movl %eax, 12(%edi)
+; X86-NEXT: movl %esi, (%edi)
+; X86-NEXT: movl %ecx, 4(%edi)
+; X86-NEXT: sete %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: reset_eq_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl $1, %edx
+; SSE-NEXT: xorl %esi, %esi
+; SSE-NEXT: shldq %cl, %rdx, %rsi
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: shlq %cl, %rdx
+; SSE-NEXT: testb $64, %cl
+; SSE-NEXT: cmovneq %rdx, %rsi
+; SSE-NEXT: cmovneq %rax, %rdx
+; SSE-NEXT: movq (%rdi), %rax
+; SSE-NEXT: movq 8(%rdi), %rcx
+; SSE-NEXT: movq %rcx, %r8
+; SSE-NEXT: andq %rsi, %r8
+; SSE-NEXT: notq %rsi
+; SSE-NEXT: movq %rax, %r9
+; SSE-NEXT: andq %rdx, %r9
+; SSE-NEXT: notq %rdx
+; SSE-NEXT: andq %rcx, %rsi
+; SSE-NEXT: andq %rax, %rdx
+; SSE-NEXT: orq %r8, %r9
+; SSE-NEXT: sete %al
+; SSE-NEXT: movq %rdx, (%rdi)
+; SSE-NEXT: movq %rsi, 8(%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reset_eq_i128:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %esi, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: movl $1, %edx
+; AVX-NEXT: xorl %esi, %esi
+; AVX-NEXT: shldq %cl, %rdx, %rsi
+; AVX-NEXT: shlxq %rcx, %rdx, %rdx
+; AVX-NEXT: testb $64, %cl
+; AVX-NEXT: cmovneq %rdx, %rsi
+; AVX-NEXT: cmovneq %rax, %rdx
+; AVX-NEXT: movq (%rdi), %rax
+; AVX-NEXT: movq 8(%rdi), %rcx
+; AVX-NEXT: andnq %rcx, %rsi, %r8
+; AVX-NEXT: andq %rsi, %rcx
+; AVX-NEXT: andnq %rax, %rdx, %rsi
+; AVX-NEXT: andq %rdx, %rax
+; AVX-NEXT: orq %rcx, %rax
+; AVX-NEXT: sete %al
+; AVX-NEXT: movq %rsi, (%rdi)
+; AVX-NEXT: movq %r8, 8(%rdi)
+; AVX-NEXT: retq
+ %rem = and i32 %position, 127
+ %ofs = zext nneg i32 %rem to i128
+ %bit = shl nuw i128 1, %ofs
+ %mask = xor i128 %bit, -1
+ %ld = load i128, ptr %word
+ %test = and i128 %ld, %bit
+ %res = and i128 %ld, %mask
+ %cmp = icmp eq i128 %test, 0
+ store i128 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @set_ne_i128(ptr %word, i32 %position) nounwind {
+; X86-LABEL: set_ne_i128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $80, %esp
+; X86-NEXT: movzbl 12(%ebp), %ecx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: negb %al
+; X86-NEXT: movsbl %al, %eax
+; X86-NEXT: movl 56(%esp,%eax), %esi
+; X86-NEXT: movl 60(%esp,%eax), %edx
+; X86-NEXT: shldl %cl, %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%esp,%eax), %edi
+; X86-NEXT: movl 52(%esp,%eax), %ebx
+; X86-NEXT: shldl %cl, %ebx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edi, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: shll %cl, %edi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl 8(%eax), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: andl %edi, %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl 12(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 4(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %esi, 12(%eax)
+; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: movl %ebx, 4(%eax)
+; X86-NEXT: setne %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: set_ne_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl $1, %edx
+; SSE-NEXT: xorl %esi, %esi
+; SSE-NEXT: shldq %cl, %rdx, %rsi
+; SSE-NEXT: shlq %cl, %rdx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: testb $64, %cl
+; SSE-NEXT: cmovneq %rdx, %rsi
+; SSE-NEXT: cmovneq %rax, %rdx
+; SSE-NEXT: movq (%rdi), %rax
+; SSE-NEXT: movq 8(%rdi), %rcx
+; SSE-NEXT: movq %rcx, %r8
+; SSE-NEXT: andq %rsi, %r8
+; SSE-NEXT: movq %rax, %r9
+; SSE-NEXT: andq %rdx, %r9
+; SSE-NEXT: orq %rcx, %rsi
+; SSE-NEXT: orq %rax, %rdx
+; SSE-NEXT: orq %r8, %r9
+; SSE-NEXT: setne %al
+; SSE-NEXT: movq %rdx, (%rdi)
+; SSE-NEXT: movq %rsi, 8(%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: set_ne_i128:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %esi, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: movl $1, %edx
+; AVX-NEXT: xorl %esi, %esi
+; AVX-NEXT: shldq %cl, %rdx, %rsi
+; AVX-NEXT: shlxq %rcx, %rdx, %rdx
+; AVX-NEXT: testb $64, %cl
+; AVX-NEXT: cmovneq %rdx, %rsi
+; AVX-NEXT: cmovneq %rax, %rdx
+; AVX-NEXT: movq (%rdi), %rax
+; AVX-NEXT: movq 8(%rdi), %rcx
+; AVX-NEXT: movq %rcx, %r8
+; AVX-NEXT: andq %rsi, %r8
+; AVX-NEXT: movq %rax, %r9
+; AVX-NEXT: andq %rdx, %r9
+; AVX-NEXT: orq %rcx, %rsi
+; AVX-NEXT: orq %rax, %rdx
+; AVX-NEXT: orq %r8, %r9
+; AVX-NEXT: setne %al
+; AVX-NEXT: movq %rdx, (%rdi)
+; AVX-NEXT: movq %rsi, 8(%rdi)
+; AVX-NEXT: retq
+ %rem = and i32 %position, 127
+ %ofs = zext nneg i32 %rem to i128
+ %bit = shl nuw i128 1, %ofs
+ %ld = load i128, ptr %word
+ %test = and i128 %ld, %bit
+ %res = or i128 %ld, %bit
+ %cmp = icmp ne i128 %test, 0
+ store i128 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @init_eq_i128(ptr %word, i32 %position, i1 zeroext %value) nounwind {
+; X86-LABEL: init_eq_i128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $128, %esp
+; X86-NEXT: movzbl 12(%ebp), %ecx
+; X86-NEXT: movzbl 16(%ebp), %eax
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: shrb $3, %dl
+; X86-NEXT: andb $12, %dl
+; X86-NEXT: negb %dl
+; X86-NEXT: movsbl %dl, %esi
+; X86-NEXT: movl 64(%esp,%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 68(%esp,%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 72(%esp,%esi), %ebx
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: movl 76(%esp,%esi), %edi
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: shldl %cl, %ebx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%esi), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %eax
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl (%esi), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: movl 12(%ecx), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: movl 4(%ecx), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ecx, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: notl %ecx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl 100(%esp,%ecx), %edi
+; X86-NEXT: movl 104(%esp,%ecx), %ecx
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movzbl 12(%ebp), %ecx
+; X86-NEXT: shldl %cl, %edi, %ebx
+; X86-NEXT: orl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: notl %esi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl 108(%esp,%ebx), %ebx
+; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: orl %ebx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: notl %eax
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl 96(%esp,%ebx), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shll %cl, %ebx
+; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl %cl, %ebx, %edi
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl 8(%ebp), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 8(%ecx)
+; X86-NEXT: movl %esi, 12(%ecx)
+; X86-NEXT: movl %eax, (%ecx)
+; X86-NEXT: movl %edx, 4(%ecx)
+; X86-NEXT: sete %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: init_eq_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl $1, %esi
+; SSE-NEXT: xorl %r8d, %r8d
+; SSE-NEXT: shldq %cl, %rsi, %r8
+; SSE-NEXT: shlq %cl, %rsi
+; SSE-NEXT: movl %edx, %eax
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: shldq %cl, %rax, %rdx
+; SSE-NEXT: shlq %cl, %rax
+; SSE-NEXT: xorl %r9d, %r9d
+; SSE-NEXT: testb $64, %cl
+; SSE-NEXT: cmovneq %rsi, %r8
+; SSE-NEXT: cmovneq %r9, %rsi
+; SSE-NEXT: cmovneq %rax, %rdx
+; SSE-NEXT: cmovneq %r9, %rax
+; SSE-NEXT: movq (%rdi), %rcx
+; SSE-NEXT: movq 8(%rdi), %r9
+; SSE-NEXT: movq %r9, %r10
+; SSE-NEXT: andq %r8, %r10
+; SSE-NEXT: notq %r8
+; SSE-NEXT: movq %rcx, %r11
+; SSE-NEXT: andq %rsi, %r11
+; SSE-NEXT: notq %rsi
+; SSE-NEXT: andq %r9, %r8
+; SSE-NEXT: orq %rdx, %r8
+; SSE-NEXT: andq %rcx, %rsi
+; SSE-NEXT: orq %rax, %rsi
+; SSE-NEXT: orq %r10, %r11
+; SSE-NEXT: sete %al
+; SSE-NEXT: movq %rsi, (%rdi)
+; SSE-NEXT: movq %r8, 8(%rdi)
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: init_eq_i128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: movl $1, %esi
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: shldq %cl, %rsi, %rax
+; AVX2-NEXT: xorl %r8d, %r8d
+; AVX2-NEXT: movl %edx, %edx
+; AVX2-NEXT: xorl %r9d, %r9d
+; AVX2-NEXT: shldq %cl, %rdx, %r9
+; AVX2-NEXT: shlxq %rcx, %rsi, %rsi
+; AVX2-NEXT: testb $64, %cl
+; AVX2-NEXT: cmovneq %rsi, %rax
+; AVX2-NEXT: cmovneq %r8, %rsi
+; AVX2-NEXT: shlxq %rcx, %rdx, %rcx
+; AVX2-NEXT: cmovneq %rcx, %r9
+; AVX2-NEXT: cmovneq %r8, %rcx
+; AVX2-NEXT: movq (%rdi), %rdx
+; AVX2-NEXT: movq 8(%rdi), %r8
+; AVX2-NEXT: andnq %r8, %rax, %r10
+; AVX2-NEXT: andq %rax, %r8
+; AVX2-NEXT: andnq %rdx, %rsi, %r11
+; AVX2-NEXT: andq %rsi, %rdx
+; AVX2-NEXT: orq %r9, %r10
+; AVX2-NEXT: orq %rcx, %r11
+; AVX2-NEXT: orq %r8, %rdx
+; AVX2-NEXT: sete %al
+; AVX2-NEXT: movq %r11, (%rdi)
+; AVX2-NEXT: movq %r10, 8(%rdi)
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: init_eq_i128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: movl $1, %esi
+; AVX512-NEXT: xorl %r8d, %r8d
+; AVX512-NEXT: shldq %cl, %rsi, %r8
+; AVX512-NEXT: shlxq %rcx, %rsi, %rsi
+; AVX512-NEXT: movl %edx, %edx
+; AVX512-NEXT: xorl %r9d, %r9d
+; AVX512-NEXT: shldq %cl, %rdx, %r9
+; AVX512-NEXT: testb $64, %cl
+; AVX512-NEXT: cmovneq %rsi, %r8
+; AVX512-NEXT: cmovneq %rax, %rsi
+; AVX512-NEXT: shlxq %rcx, %rdx, %rcx
+; AVX512-NEXT: cmovneq %rcx, %r9
+; AVX512-NEXT: cmovneq %rax, %rcx
+; AVX512-NEXT: movq (%rdi), %rax
+; AVX512-NEXT: movq 8(%rdi), %rdx
+; AVX512-NEXT: andnq %rdx, %r8, %r10
+; AVX512-NEXT: andq %r8, %rdx
+; AVX512-NEXT: andnq %rax, %rsi, %r8
+; AVX512-NEXT: andq %rsi, %rax
+; AVX512-NEXT: orq %r9, %r10
+; AVX512-NEXT: orq %rcx, %r8
+; AVX512-NEXT: orq %rdx, %rax
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: movq %r8, (%rdi)
+; AVX512-NEXT: movq %r10, 8(%rdi)
+; AVX512-NEXT: retq
+ %rem = and i32 %position, 127
+ %ofs = zext nneg i32 %rem to i128
+ %bit = shl nuw i128 1, %ofs
+ %mask = xor i128 %bit, -1
+ %val0 = zext i1 %value to i128
+ %val = shl nuw i128 %val0, %ofs
+ %ld = load i128, ptr %word
+ %test = and i128 %ld, %bit
+ %res0 = and i128 %ld, %mask
+ %res = or i128 %res0, %val
+ %cmp = icmp eq i128 %test, 0
+ store i128 %res, ptr %word
+ ret i1 %cmp
+}
+
+; i512
+
+define i1 @test_ne_i512(ptr %word, i32 %position) nounwind {
+; X86-LABEL: test_ne_i512:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $224, %esp
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrl $3, %eax
+; X86-NEXT: andl $60, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: leal {{[0-9]+}}(%esp), %edx
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 24(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl $31, %ecx
+; X86-NEXT: shldl %cl, %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 56(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 60(%edx), %eax
+; X86-NEXT: shldl %cl, %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%edx), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%edx), %edi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%edx), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %esi, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl 52(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl 4(%edx), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl %cl, %edi, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl %cl, %edi, %eax
+; X86-NEXT: movl 8(%ebp), %ebx
+; X86-NEXT: andl 40(%ebx), %eax
+; X86-NEXT: andl 8(%ebx), %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 56(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl 24(%ebx), %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: orl %esi, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: andl 44(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl 12(%ebx), %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl 60(%edi), %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 28(%edi), %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl (%edx), %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl %cl, %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: negl %edx
+; X86-NEXT: movl 192(%esp,%edx), %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl %cl, %ebx, %edx
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl 8(%ebp), %ebx
+; X86-NEXT: andl 32(%ebx), %ecx
+; X86-NEXT: andl (%ebx), %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: andl 16(%ebx), %edi
+; X86-NEXT: andl 48(%ebx), %edx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: orl %esi, %edx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 36(%ebx), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl 4(%ebx), %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 20(%ebx), %ecx
+; X86-NEXT: andl 52(%ebx), %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: setne %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: test_ne_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: andl $63, %ecx
+; SSE-NEXT: shrl $3, %esi
+; SSE-NEXT: andl $56, %esi
+; SSE-NEXT: negl %esi
+; SSE-NEXT: movslq %esi, %rbx
+; SSE-NEXT: movq -48(%rsp,%rbx), %rdx
+; SSE-NEXT: movq -40(%rsp,%rbx), %r14
+; SSE-NEXT: movq %r14, %rax
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq -16(%rsp,%rbx), %r11
+; SSE-NEXT: movq -8(%rsp,%rbx), %r10
+; SSE-NEXT: shldq %cl, %r11, %r10
+; SSE-NEXT: movq -32(%rsp,%rbx), %r9
+; SSE-NEXT: movq -24(%rsp,%rbx), %r15
+; SSE-NEXT: movq %r15, %r8
+; SSE-NEXT: shldq %cl, %r9, %r8
+; SSE-NEXT: movq -56(%rsp,%rbx), %rsi
+; SSE-NEXT: shldq %cl, %rsi, %rdx
+; SSE-NEXT: shldq %cl, %r15, %r11
+; SSE-NEXT: shldq %cl, %r14, %r9
+; SSE-NEXT: movq -64(%rsp,%rbx), %rbx
+; SSE-NEXT: shldq %cl, %rbx, %rsi
+; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT: shlq %cl, %rbx
+; SSE-NEXT: andq 32(%rdi), %r9
+; SSE-NEXT: andq 48(%rdi), %r11
+; SSE-NEXT: andq 16(%rdi), %rdx
+; SSE-NEXT: orq %r11, %rdx
+; SSE-NEXT: andq 40(%rdi), %r8
+; SSE-NEXT: andq 56(%rdi), %r10
+; SSE-NEXT: andq 24(%rdi), %rax
+; SSE-NEXT: orq %r10, %rax
+; SSE-NEXT: andq (%rdi), %rbx
+; SSE-NEXT: orq %r9, %rbx
+; SSE-NEXT: orq %rdx, %rbx
+; SSE-NEXT: andq 8(%rdi), %rsi
+; SSE-NEXT: orq %r8, %rsi
+; SSE-NEXT: orq %rax, %rsi
+; SSE-NEXT: orq %rbx, %rsi
+; SSE-NEXT: setne %al
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_ne_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0]
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: andl $63, %ecx
+; AVX2-NEXT: shrl $3, %esi
+; AVX2-NEXT: andl $56, %esi
+; AVX2-NEXT: negl %esi
+; AVX2-NEXT: movslq %esi, %rsi
+; AVX2-NEXT: movq -48(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq -40(%rsp,%rsi), %rbx
+; AVX2-NEXT: movq %rbx, %rax
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq -16(%rsp,%rsi), %r11
+; AVX2-NEXT: movq -8(%rsp,%rsi), %r10
+; AVX2-NEXT: shldq %cl, %r11, %r10
+; AVX2-NEXT: movq -32(%rsp,%rsi), %r9
+; AVX2-NEXT: movq -24(%rsp,%rsi), %r14
+; AVX2-NEXT: movq %r14, %r8
+; AVX2-NEXT: shldq %cl, %r9, %r8
+; AVX2-NEXT: movq -64(%rsp,%rsi), %r15
+; AVX2-NEXT: movq -56(%rsp,%rsi), %rsi
+; AVX2-NEXT: shldq %cl, %rsi, %rdx
+; AVX2-NEXT: shldq %cl, %r14, %r11
+; AVX2-NEXT: shldq %cl, %rbx, %r9
+; AVX2-NEXT: shldq %cl, %r15, %rsi
+; AVX2-NEXT: shlxq %rcx, %r15, %rcx
+; AVX2-NEXT: andq 32(%rdi), %r9
+; AVX2-NEXT: andq 48(%rdi), %r11
+; AVX2-NEXT: andq 16(%rdi), %rdx
+; AVX2-NEXT: andq 40(%rdi), %r8
+; AVX2-NEXT: andq 56(%rdi), %r10
+; AVX2-NEXT: andq 24(%rdi), %rax
+; AVX2-NEXT: orq %r11, %rdx
+; AVX2-NEXT: orq %r10, %rax
+; AVX2-NEXT: andq (%rdi), %rcx
+; AVX2-NEXT: orq %r9, %rcx
+; AVX2-NEXT: orq %rdx, %rcx
+; AVX2-NEXT: andq 8(%rdi), %rsi
+; AVX2-NEXT: orq %r8, %rsi
+; AVX2-NEXT: orq %rax, %rsi
+; AVX2-NEXT: orq %rcx, %rsi
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_ne_i512:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %r15
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0]
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: andl $63, %ecx
+; AVX512-NEXT: shrl $3, %esi
+; AVX512-NEXT: andl $56, %esi
+; AVX512-NEXT: negl %esi
+; AVX512-NEXT: movslq %esi, %rbx
+; AVX512-NEXT: movq -48(%rsp,%rbx), %rdx
+; AVX512-NEXT: movq -40(%rsp,%rbx), %r14
+; AVX512-NEXT: movq %r14, %rax
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq -16(%rsp,%rbx), %r11
+; AVX512-NEXT: movq -8(%rsp,%rbx), %r10
+; AVX512-NEXT: shldq %cl, %r11, %r10
+; AVX512-NEXT: movq -32(%rsp,%rbx), %r9
+; AVX512-NEXT: movq -24(%rsp,%rbx), %r15
+; AVX512-NEXT: movq %r15, %r8
+; AVX512-NEXT: shldq %cl, %r9, %r8
+; AVX512-NEXT: movq -56(%rsp,%rbx), %rsi
+; AVX512-NEXT: shldq %cl, %rsi, %rdx
+; AVX512-NEXT: shldq %cl, %r15, %r11
+; AVX512-NEXT: shldq %cl, %r14, %r9
+; AVX512-NEXT: movq -64(%rsp,%rbx), %rbx
+; AVX512-NEXT: shldq %cl, %rbx, %rsi
+; AVX512-NEXT: shlxq %rcx, %rbx, %rcx
+; AVX512-NEXT: andq 32(%rdi), %r9
+; AVX512-NEXT: andq 48(%rdi), %r11
+; AVX512-NEXT: andq 16(%rdi), %rdx
+; AVX512-NEXT: andq 40(%rdi), %r8
+; AVX512-NEXT: andq 56(%rdi), %r10
+; AVX512-NEXT: andq 24(%rdi), %rax
+; AVX512-NEXT: orq %r11, %rdx
+; AVX512-NEXT: orq %r10, %rax
+; AVX512-NEXT: andq (%rdi), %rcx
+; AVX512-NEXT: orq %r9, %rcx
+; AVX512-NEXT: orq %rdx, %rcx
+; AVX512-NEXT: andq 8(%rdi), %rsi
+; AVX512-NEXT: orq %r8, %rsi
+; AVX512-NEXT: orq %rax, %rsi
+; AVX512-NEXT: orq %rcx, %rsi
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %r15
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %rem = and i32 %position, 511
+ %ofs = zext nneg i32 %rem to i512
+ %bit = shl nuw i512 1, %ofs
+ %ld = load i512, ptr %word
+ %test = and i512 %ld, %bit
+ %cmp = icmp ne i512 %test, 0
+ ret i1 %cmp
+}
+
+define i1 @complement_ne_i512(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_ne_i512:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $272, %esp # imm = 0x110
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrl $3, %eax
+; X86-NEXT: andl $60, %eax
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: leal {{[0-9]+}}(%esp), %edx
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 24(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl $31, %ecx
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 56(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 60(%edx), %esi
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%edx), %ebx
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%edx), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edi, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl 52(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl %cl, %esi, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 4(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl 40(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: movl 8(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: movl 56(%edx), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edi, %ebx
+; X86-NEXT: movl 24(%edx), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: orl %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%eax), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl 12(%eax), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: orl %esi, %ebx
+; X86-NEXT: movl 60(%eax), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl 28(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl (%eax), %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NEXT: negl %eax
+; X86-NEXT: movl 240(%esp,%eax), %esi
+; X86-NEXT: shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl 32(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edi, %eax
+; X86-NEXT: movl (%esi), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edx, %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl 16(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %eax
+; X86-NEXT: movl 48(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 36(%esi), %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl 4(%esi), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl 20(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl %esi, %edi
+; X86-NEXT: movl 52(%eax), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: orl %edi, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: xorl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: xorl %ecx, (%esp) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ebx, 60(%edx)
+; X86-NEXT: movl %edi, 56(%edx)
+; X86-NEXT: movl %ecx, 52(%edx)
+; X86-NEXT: movl %esi, 44(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 40(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 36(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 32(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 28(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 24(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 20(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 16(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 12(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 8(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 4(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, (%edx)
+; X86-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 48(%edx)
+; X86-NEXT: setne %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: complement_ne_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rbp
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %r13
+; SSE-NEXT: pushq %r12
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: subq $56, %rsp
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: andl $63, %ecx
+; SSE-NEXT: shrl $3, %esi
+; SSE-NEXT: andl $56, %esi
+; SSE-NEXT: negl %esi
+; SSE-NEXT: movslq %esi, %rbx
+; SSE-NEXT: movq (%rsp,%rbx), %rsi
+; SSE-NEXT: movq 8(%rsp,%rbx), %r14
+; SSE-NEXT: movq %r14, %rax
+; SSE-NEXT: shldq %cl, %rsi, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 32(%rsp,%rbx), %r8
+; SSE-NEXT: movq 40(%rsp,%rbx), %rbp
+; SSE-NEXT: shldq %cl, %r8, %rbp
+; SSE-NEXT: movq 16(%rsp,%rbx), %r9
+; SSE-NEXT: movq 24(%rsp,%rbx), %r15
+; SSE-NEXT: movq %r15, %r10
+; SSE-NEXT: shldq %cl, %r9, %r10
+; SSE-NEXT: movq -8(%rsp,%rbx), %r11
+; SSE-NEXT: shldq %cl, %r11, %rsi
+; SSE-NEXT: shldq %cl, %r15, %r8
+; SSE-NEXT: shldq %cl, %r14, %r9
+; SSE-NEXT: movq -16(%rsp,%rbx), %rbx
+; SSE-NEXT: shldq %cl, %rbx, %r11
+; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT: shlq %cl, %rbx
+; SSE-NEXT: movq 24(%rdi), %r15
+; SSE-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 56(%rdi), %rcx
+; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 16(%rdi), %r12
+; SSE-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 48(%rdi), %r13
+; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %r8, %r13
+; SSE-NEXT: andq %rsi, %r12
+; SSE-NEXT: orq %r13, %r12
+; SSE-NEXT: movq %rcx, %r13
+; SSE-NEXT: andq %rbp, %r13
+; SSE-NEXT: andq %rax, %r15
+; SSE-NEXT: orq %r13, %r15
+; SSE-NEXT: movq 32(%rdi), %r14
+; SSE-NEXT: movq %r14, %rcx
+; SSE-NEXT: andq %r9, %rcx
+; SSE-NEXT: movq (%rdi), %r13
+; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %rbx, %r13
+; SSE-NEXT: orq %rcx, %r13
+; SSE-NEXT: orq %r12, %r13
+; SSE-NEXT: movq 40(%rdi), %rcx
+; SSE-NEXT: movq %rcx, %r12
+; SSE-NEXT: andq %r10, %r12
+; SSE-NEXT: movq 8(%rdi), %rdx
+; SSE-NEXT: movq %rdx, %rax
+; SSE-NEXT: andq %r11, %rax
+; SSE-NEXT: orq %r12, %rax
+; SSE-NEXT: orq %r15, %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; SSE-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; SSE-NEXT: xorq %rcx, %r10
+; SSE-NEXT: xorq %r14, %r9
+; SSE-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; SSE-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; SSE-NEXT: xorq %rdx, %r11
+; SSE-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; SSE-NEXT: orq %r13, %rax
+; SSE-NEXT: movq %r8, 48(%rdi)
+; SSE-NEXT: movq %rbp, 56(%rdi)
+; SSE-NEXT: movq %r9, 32(%rdi)
+; SSE-NEXT: movq %r10, 40(%rdi)
+; SSE-NEXT: movq %rsi, 16(%rdi)
+; SSE-NEXT: movq %r15, 24(%rdi)
+; SSE-NEXT: movq %rbx, (%rdi)
+; SSE-NEXT: movq %r11, 8(%rdi)
+; SSE-NEXT: setne %al
+; SSE-NEXT: addq $56, %rsp
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r12
+; SSE-NEXT: popq %r13
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: popq %rbp
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: complement_ne_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %r13
+; AVX2-NEXT: pushq %r12
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: subq $72, %rsp
+; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0]
+; AVX2-NEXT: vmovups %ymm0, (%rsp)
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: andl $63, %ecx
+; AVX2-NEXT: shrl $3, %esi
+; AVX2-NEXT: andl $56, %esi
+; AVX2-NEXT: negl %esi
+; AVX2-NEXT: movslq %esi, %rbx
+; AVX2-NEXT: movq 16(%rsp,%rbx), %rsi
+; AVX2-NEXT: movq 24(%rsp,%rbx), %rbp
+; AVX2-NEXT: movq %rbp, %rax
+; AVX2-NEXT: shldq %cl, %rsi, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 48(%rsp,%rbx), %r8
+; AVX2-NEXT: movq 56(%rsp,%rbx), %r13
+; AVX2-NEXT: shldq %cl, %r8, %r13
+; AVX2-NEXT: movq 32(%rsp,%rbx), %r9
+; AVX2-NEXT: movq 40(%rsp,%rbx), %r14
+; AVX2-NEXT: movq %r14, %r10
+; AVX2-NEXT: shldq %cl, %r9, %r10
+; AVX2-NEXT: movq 8(%rsp,%rbx), %r11
+; AVX2-NEXT: shldq %cl, %r11, %rsi
+; AVX2-NEXT: shldq %cl, %r14, %r8
+; AVX2-NEXT: movq 16(%rdi), %r12
+; AVX2-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 48(%rdi), %r14
+; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq %r8, %r14
+; AVX2-NEXT: andq %rsi, %r12
+; AVX2-NEXT: orq %r14, %r12
+; AVX2-NEXT: movq 56(%rdi), %r15
+; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq %r13, %r15
+; AVX2-NEXT: movq 24(%rdi), %r14
+; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq %rax, %r14
+; AVX2-NEXT: orq %r15, %r14
+; AVX2-NEXT: shldq %cl, %rbp, %r9
+; AVX2-NEXT: movq (%rsp,%rbx), %rdx
+; AVX2-NEXT: movq 32(%rdi), %r15
+; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq %r9, %r15
+; AVX2-NEXT: shlxq %rcx, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq (%rdi), %rbx
+; AVX2-NEXT: movq %rbx, %rbp
+; AVX2-NEXT: andq %rax, %rbp
+; AVX2-NEXT: orq %r15, %rbp
+; AVX2-NEXT: orq %r12, %rbp
+; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
+; AVX2-NEXT: shldq %cl, %rdx, %r11
+; AVX2-NEXT: movq 40(%rdi), %rax
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: andq %r10, %rcx
+; AVX2-NEXT: movq 8(%rdi), %r15
+; AVX2-NEXT: movq %r15, %r12
+; AVX2-NEXT: andq %r11, %r12
+; AVX2-NEXT: orq %rcx, %r12
+; AVX2-NEXT: orq %r14, %r12
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX2-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX2-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; AVX2-NEXT: xorq %rax, %r10
+; AVX2-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; AVX2-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; AVX2-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; AVX2-NEXT: xorq %r15, %r11
+; AVX2-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; AVX2-NEXT: orq %rbp, %r12
+; AVX2-NEXT: movq %r8, 48(%rdi)
+; AVX2-NEXT: movq %r13, 56(%rdi)
+; AVX2-NEXT: movq %r9, 32(%rdi)
+; AVX2-NEXT: movq %r10, 40(%rdi)
+; AVX2-NEXT: movq %rsi, 16(%rdi)
+; AVX2-NEXT: movq %rcx, 24(%rdi)
+; AVX2-NEXT: movq %rbx, (%rdi)
+; AVX2-NEXT: movq %r11, 8(%rdi)
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: addq $72, %rsp
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r12
+; AVX2-NEXT: popq %r13
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: complement_ne_i512:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rbp
+; AVX512-NEXT: pushq %r15
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %r13
+; AVX512-NEXT: pushq %r12
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: subq $72, %rsp
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0]
+; AVX512-NEXT: vmovups %ymm0, (%rsp)
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: andl $63, %ecx
+; AVX512-NEXT: shrl $3, %esi
+; AVX512-NEXT: andl $56, %esi
+; AVX512-NEXT: negl %esi
+; AVX512-NEXT: movslq %esi, %rbx
+; AVX512-NEXT: movq 16(%rsp,%rbx), %rsi
+; AVX512-NEXT: movq 24(%rsp,%rbx), %rbp
+; AVX512-NEXT: movq %rbp, %rax
+; AVX512-NEXT: shldq %cl, %rsi, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 48(%rsp,%rbx), %r8
+; AVX512-NEXT: movq 56(%rsp,%rbx), %r13
+; AVX512-NEXT: shldq %cl, %r8, %r13
+; AVX512-NEXT: movq 32(%rsp,%rbx), %r9
+; AVX512-NEXT: movq 40(%rsp,%rbx), %r14
+; AVX512-NEXT: movq %r14, %r10
+; AVX512-NEXT: shldq %cl, %r9, %r10
+; AVX512-NEXT: movq 8(%rsp,%rbx), %r11
+; AVX512-NEXT: shldq %cl, %r11, %rsi
+; AVX512-NEXT: shldq %cl, %r14, %r8
+; AVX512-NEXT: movq 16(%rdi), %r12
+; AVX512-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 48(%rdi), %r14
+; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: andq %r8, %r14
+; AVX512-NEXT: andq %rsi, %r12
+; AVX512-NEXT: orq %r14, %r12
+; AVX512-NEXT: movq 56(%rdi), %r15
+; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: andq %r13, %r15
+; AVX512-NEXT: movq 24(%rdi), %r14
+; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: andq %rax, %r14
+; AVX512-NEXT: orq %r15, %r14
+; AVX512-NEXT: shldq %cl, %rbp, %r9
+; AVX512-NEXT: movq (%rsp,%rbx), %rdx
+; AVX512-NEXT: movq 32(%rdi), %r15
+; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: andq %r9, %r15
+; AVX512-NEXT: shlxq %rcx, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq (%rdi), %rbx
+; AVX512-NEXT: movq %rbx, %rbp
+; AVX512-NEXT: andq %rax, %rbp
+; AVX512-NEXT: orq %r15, %rbp
+; AVX512-NEXT: orq %r12, %rbp
+; AVX512-NEXT: # kill: def $cl killed $cl killed $rcx
+; AVX512-NEXT: shldq %cl, %rdx, %r11
+; AVX512-NEXT: movq 40(%rdi), %rax
+; AVX512-NEXT: movq %rax, %rcx
+; AVX512-NEXT: andq %r10, %rcx
+; AVX512-NEXT: movq 8(%rdi), %r15
+; AVX512-NEXT: movq %r15, %r12
+; AVX512-NEXT: andq %r11, %r12
+; AVX512-NEXT: orq %rcx, %r12
+; AVX512-NEXT: orq %r14, %r12
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX512-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX512-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; AVX512-NEXT: xorq %rax, %r10
+; AVX512-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; AVX512-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; AVX512-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; AVX512-NEXT: xorq %r15, %r11
+; AVX512-NEXT: xorq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; AVX512-NEXT: orq %rbp, %r12
+; AVX512-NEXT: movq %r8, 48(%rdi)
+; AVX512-NEXT: movq %r13, 56(%rdi)
+; AVX512-NEXT: movq %r9, 32(%rdi)
+; AVX512-NEXT: movq %r10, 40(%rdi)
+; AVX512-NEXT: movq %rsi, 16(%rdi)
+; AVX512-NEXT: movq %rcx, 24(%rdi)
+; AVX512-NEXT: movq %rbx, (%rdi)
+; AVX512-NEXT: movq %r11, 8(%rdi)
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: addq $72, %rsp
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r12
+; AVX512-NEXT: popq %r13
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %r15
+; AVX512-NEXT: popq %rbp
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %rem = and i32 %position, 511
+ %ofs = zext nneg i32 %rem to i512
+ %bit = shl nuw i512 1, %ofs
+ %ld = load i512, ptr %word
+ %test = and i512 %ld, %bit
+ %res = xor i512 %ld, %bit
+ %cmp = icmp ne i512 %test, 0
+ store i512 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @reset_eq_i512(ptr %word, i32 %position) nounwind {
+; X86-LABEL: reset_eq_i512:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $288, %esp # imm = 0x120
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrl $3, %eax
+; X86-NEXT: andl $60, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: leal {{[0-9]+}}(%esp), %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 4(%edi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%edi), %eax
+; X86-NEXT: andl $31, %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: shldl %cl, %edx, %ebx
+; X86-NEXT: movl 12(%edi), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%edi), %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: shldl %cl, %edx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%edi), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 24(%edi), %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: shldl %cl, %edx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%edi), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%edi), %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: shldl %cl, %edx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%edi), %esi
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%edi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %esi, %edx
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %eax, %edx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %eax, %ebx
+; X86-NEXT: orl %edx, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%edi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl 52(%edi), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 56(%edi), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shldl %cl, %esi, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl 56(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %eax, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 24(%esi), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: movl 44(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %eax, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%esi), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 60(%edi), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl %cl, %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 60(%ebx), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%ebx), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edx, %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl (%edi), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: negl %eax
+; X86-NEXT: movl 256(%esp,%eax), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl %cl, %edi, %eax
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: movl 32(%ebx), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ecx, %edx
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl (%ebx), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ecx, %edi
+; X86-NEXT: orl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%esi), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ecx, %ebx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%esi), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: orl %edi, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl %cl, %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%esi), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %edx
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 4(%esi), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %edi
+; X86-NEXT: orl %edx, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl %cl, %edi, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%esi), %edi
+; X86-NEXT: andl %edi, %ecx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl 8(%ebp), %ebx
+; X86-NEXT: movl 52(%ebx), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %edx
+; X86-NEXT: orl %esi, %edx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: notl %ebx
+; X86-NEXT: andl %edi, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: notl %esi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: notl %edi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: notl %edi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: notl %edi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: notl %ecx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %edx, 60(%eax)
+; X86-NEXT: movl %esi, 56(%eax)
+; X86-NEXT: movl %ecx, 52(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 44(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 40(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 36(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 32(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 28(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 24(%eax)
+; X86-NEXT: movl %ebx, 20(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 16(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 48(%eax)
+; X86-NEXT: sete %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: reset_eq_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rbp
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %r13
+; SSE-NEXT: pushq %r12
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: subq $56, %rsp
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: andl $63, %ecx
+; SSE-NEXT: shrl $3, %esi
+; SSE-NEXT: andl $56, %esi
+; SSE-NEXT: negl %esi
+; SSE-NEXT: movslq %esi, %rdx
+; SSE-NEXT: movq (%rsp,%rdx), %r9
+; SSE-NEXT: movq 8(%rsp,%rdx), %r8
+; SSE-NEXT: movq %r8, %rsi
+; SSE-NEXT: shldq %cl, %r9, %rsi
+; SSE-NEXT: movq -8(%rsp,%rdx), %rax
+; SSE-NEXT: shldq %cl, %rax, %r9
+; SSE-NEXT: movq 16(%rsp,%rdx), %r14
+; SSE-NEXT: movq 24(%rsp,%rdx), %r10
+; SSE-NEXT: movq %r10, %rbx
+; SSE-NEXT: shldq %cl, %r14, %rbx
+; SSE-NEXT: shldq %cl, %r8, %r14
+; SSE-NEXT: movq 32(%rsp,%rdx), %r13
+; SSE-NEXT: movq 40(%rsp,%rdx), %r12
+; SSE-NEXT: shldq %cl, %r13, %r12
+; SSE-NEXT: shldq %cl, %r10, %r13
+; SSE-NEXT: movq -16(%rsp,%rdx), %rdx
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT: shlq %cl, %rdx
+; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq %r12, %rbp
+; SSE-NEXT: movq %r9, %r15
+; SSE-NEXT: movq %rsi, %r11
+; SSE-NEXT: movq 16(%rdi), %r8
+; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 48(%rdi), %rcx
+; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %rcx, %r13
+; SSE-NEXT: andq %r8, %r9
+; SSE-NEXT: orq %r13, %r9
+; SSE-NEXT: movq 56(%rdi), %rcx
+; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %rcx, %r12
+; SSE-NEXT: movq 24(%rdi), %r10
+; SSE-NEXT: andq %r10, %rsi
+; SSE-NEXT: orq %r12, %rsi
+; SSE-NEXT: movq %r14, %r13
+; SSE-NEXT: movq 32(%rdi), %rcx
+; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %rcx, %r14
+; SSE-NEXT: movq %rdx, %r12
+; SSE-NEXT: movq (%rdi), %rcx
+; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %rcx, %rdx
+; SSE-NEXT: orq %r14, %rdx
+; SSE-NEXT: orq %r9, %rdx
+; SSE-NEXT: movq %rbx, %r14
+; SSE-NEXT: movq 40(%rdi), %rcx
+; SSE-NEXT: andq %rcx, %rbx
+; SSE-NEXT: movq %rax, %r9
+; SSE-NEXT: movq 8(%rdi), %r8
+; SSE-NEXT: andq %r8, %rax
+; SSE-NEXT: orq %rbx, %rax
+; SSE-NEXT: orq %rsi, %rax
+; SSE-NEXT: notq %r11
+; SSE-NEXT: andq %r10, %r11
+; SSE-NEXT: notq %r15
+; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; SSE-NEXT: notq %r14
+; SSE-NEXT: andq %rcx, %r14
+; SSE-NEXT: notq %r13
+; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; SSE-NEXT: notq %rbp
+; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; SSE-NEXT: notq %rcx
+; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; SSE-NEXT: notq %r9
+; SSE-NEXT: andq %r8, %r9
+; SSE-NEXT: notq %r12
+; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; SSE-NEXT: orq %rdx, %rax
+; SSE-NEXT: movq %rcx, 48(%rdi)
+; SSE-NEXT: movq %rbp, 56(%rdi)
+; SSE-NEXT: movq %r13, 32(%rdi)
+; SSE-NEXT: movq %r14, 40(%rdi)
+; SSE-NEXT: movq %r15, 16(%rdi)
+; SSE-NEXT: movq %r11, 24(%rdi)
+; SSE-NEXT: movq %r12, (%rdi)
+; SSE-NEXT: movq %r9, 8(%rdi)
+; SSE-NEXT: sete %al
+; SSE-NEXT: addq $56, %rsp
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r12
+; SSE-NEXT: popq %r13
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: popq %rbp
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: reset_eq_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %r13
+; AVX2-NEXT: pushq %r12
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: pushq %rax
+; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0]
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: andl $63, %ecx
+; AVX2-NEXT: shrl $3, %esi
+; AVX2-NEXT: andl $56, %esi
+; AVX2-NEXT: negl %esi
+; AVX2-NEXT: movslq %esi, %rdx
+; AVX2-NEXT: movq -48(%rsp,%rdx), %r8
+; AVX2-NEXT: movq -40(%rsp,%rdx), %rbx
+; AVX2-NEXT: movq %rbx, %rax
+; AVX2-NEXT: shldq %cl, %r8, %rax
+; AVX2-NEXT: movq -16(%rsp,%rdx), %r10
+; AVX2-NEXT: movq -8(%rsp,%rdx), %rsi
+; AVX2-NEXT: shldq %cl, %r10, %rsi
+; AVX2-NEXT: movq -32(%rsp,%rdx), %r11
+; AVX2-NEXT: movq -24(%rsp,%rdx), %r14
+; AVX2-NEXT: movq %r14, %r9
+; AVX2-NEXT: shldq %cl, %r11, %r9
+; AVX2-NEXT: movq -64(%rsp,%rdx), %r15
+; AVX2-NEXT: movq -56(%rsp,%rdx), %rdx
+; AVX2-NEXT: shldq %cl, %rdx, %r8
+; AVX2-NEXT: shldq %cl, %r14, %r10
+; AVX2-NEXT: shldq %cl, %rbx, %r11
+; AVX2-NEXT: shldq %cl, %r15, %rdx
+; AVX2-NEXT: shlxq %rcx, %r15, %rcx
+; AVX2-NEXT: movq 24(%rdi), %rbx
+; AVX2-NEXT: movq 56(%rdi), %r14
+; AVX2-NEXT: movq 16(%rdi), %r15
+; AVX2-NEXT: movq 48(%rdi), %r13
+; AVX2-NEXT: movq 32(%rdi), %rbp
+; AVX2-NEXT: andnq %rbp, %r11, %r12
+; AVX2-NEXT: andq %r11, %rbp
+; AVX2-NEXT: andnq %r13, %r10, %r11
+; AVX2-NEXT: andq %r10, %r13
+; AVX2-NEXT: andnq %r15, %r8, %r10
+; AVX2-NEXT: andq %r8, %r15
+; AVX2-NEXT: movq 40(%rdi), %r8
+; AVX2-NEXT: orq %r13, %r15
+; AVX2-NEXT: andnq %r8, %r9, %r13
+; AVX2-NEXT: andq %r9, %r8
+; AVX2-NEXT: andnq %r14, %rsi, %r9
+; AVX2-NEXT: andq %rsi, %r14
+; AVX2-NEXT: andnq %rbx, %rax, %rsi
+; AVX2-NEXT: andq %rax, %rbx
+; AVX2-NEXT: movq (%rdi), %rax
+; AVX2-NEXT: orq %r14, %rbx
+; AVX2-NEXT: andnq %rax, %rcx, %r14
+; AVX2-NEXT: andq %rcx, %rax
+; AVX2-NEXT: orq %rbp, %rax
+; AVX2-NEXT: movq 8(%rdi), %rcx
+; AVX2-NEXT: orq %r15, %rax
+; AVX2-NEXT: andnq %rcx, %rdx, %r15
+; AVX2-NEXT: andq %rdx, %rcx
+; AVX2-NEXT: orq %r8, %rcx
+; AVX2-NEXT: orq %rbx, %rcx
+; AVX2-NEXT: orq %rax, %rcx
+; AVX2-NEXT: movq %r11, 48(%rdi)
+; AVX2-NEXT: movq %r9, 56(%rdi)
+; AVX2-NEXT: movq %r12, 32(%rdi)
+; AVX2-NEXT: movq %r13, 40(%rdi)
+; AVX2-NEXT: movq %r10, 16(%rdi)
+; AVX2-NEXT: movq %rsi, 24(%rdi)
+; AVX2-NEXT: movq %r14, (%rdi)
+; AVX2-NEXT: movq %r15, 8(%rdi)
+; AVX2-NEXT: sete %al
+; AVX2-NEXT: addq $8, %rsp
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r12
+; AVX2-NEXT: popq %r13
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: reset_eq_i512:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rbp
+; AVX512-NEXT: pushq %r15
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %r13
+; AVX512-NEXT: pushq %r12
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: pushq %rax
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0]
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: andl $63, %ecx
+; AVX512-NEXT: shrl $3, %esi
+; AVX512-NEXT: andl $56, %esi
+; AVX512-NEXT: negl %esi
+; AVX512-NEXT: movslq %esi, %rbx
+; AVX512-NEXT: movq -48(%rsp,%rbx), %r8
+; AVX512-NEXT: movq -40(%rsp,%rbx), %r14
+; AVX512-NEXT: movq %r14, %rax
+; AVX512-NEXT: shldq %cl, %r8, %rax
+; AVX512-NEXT: movq -16(%rsp,%rbx), %r10
+; AVX512-NEXT: movq -8(%rsp,%rbx), %rsi
+; AVX512-NEXT: shldq %cl, %r10, %rsi
+; AVX512-NEXT: movq -32(%rsp,%rbx), %r11
+; AVX512-NEXT: movq -24(%rsp,%rbx), %r15
+; AVX512-NEXT: movq %r15, %r9
+; AVX512-NEXT: shldq %cl, %r11, %r9
+; AVX512-NEXT: movq -56(%rsp,%rbx), %rdx
+; AVX512-NEXT: shldq %cl, %rdx, %r8
+; AVX512-NEXT: shldq %cl, %r15, %r10
+; AVX512-NEXT: shldq %cl, %r14, %r11
+; AVX512-NEXT: movq -64(%rsp,%rbx), %rbx
+; AVX512-NEXT: shldq %cl, %rbx, %rdx
+; AVX512-NEXT: shlxq %rcx, %rbx, %rcx
+; AVX512-NEXT: movq 24(%rdi), %rbx
+; AVX512-NEXT: movq 56(%rdi), %r14
+; AVX512-NEXT: movq 16(%rdi), %r15
+; AVX512-NEXT: movq 48(%rdi), %r13
+; AVX512-NEXT: movq 32(%rdi), %rbp
+; AVX512-NEXT: andnq %rbp, %r11, %r12
+; AVX512-NEXT: andq %r11, %rbp
+; AVX512-NEXT: andnq %r13, %r10, %r11
+; AVX512-NEXT: andq %r10, %r13
+; AVX512-NEXT: andnq %r15, %r8, %r10
+; AVX512-NEXT: andq %r8, %r15
+; AVX512-NEXT: movq 40(%rdi), %r8
+; AVX512-NEXT: orq %r13, %r15
+; AVX512-NEXT: andnq %r8, %r9, %r13
+; AVX512-NEXT: andq %r9, %r8
+; AVX512-NEXT: andnq %r14, %rsi, %r9
+; AVX512-NEXT: andq %rsi, %r14
+; AVX512-NEXT: andnq %rbx, %rax, %rsi
+; AVX512-NEXT: andq %rax, %rbx
+; AVX512-NEXT: movq (%rdi), %rax
+; AVX512-NEXT: orq %r14, %rbx
+; AVX512-NEXT: andnq %rax, %rcx, %r14
+; AVX512-NEXT: andq %rcx, %rax
+; AVX512-NEXT: orq %rbp, %rax
+; AVX512-NEXT: movq 8(%rdi), %rcx
+; AVX512-NEXT: orq %r15, %rax
+; AVX512-NEXT: andnq %rcx, %rdx, %r15
+; AVX512-NEXT: andq %rdx, %rcx
+; AVX512-NEXT: orq %r8, %rcx
+; AVX512-NEXT: orq %rbx, %rcx
+; AVX512-NEXT: orq %rax, %rcx
+; AVX512-NEXT: movq %r11, 48(%rdi)
+; AVX512-NEXT: movq %r9, 56(%rdi)
+; AVX512-NEXT: movq %r12, 32(%rdi)
+; AVX512-NEXT: movq %r13, 40(%rdi)
+; AVX512-NEXT: movq %r10, 16(%rdi)
+; AVX512-NEXT: movq %rsi, 24(%rdi)
+; AVX512-NEXT: movq %r14, (%rdi)
+; AVX512-NEXT: movq %r15, 8(%rdi)
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: addq $8, %rsp
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r12
+; AVX512-NEXT: popq %r13
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %r15
+; AVX512-NEXT: popq %rbp
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %rem = and i32 %position, 511
+ %ofs = zext nneg i32 %rem to i512
+ %bit = shl nuw i512 1, %ofs
+ %mask = xor i512 %bit, -1
+ %ld = load i512, ptr %word
+ %test = and i512 %ld, %bit
+ %res = and i512 %ld, %mask
+ %cmp = icmp eq i512 %test, 0
+ store i512 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @set_ne_i512(ptr %word, i32 %position) nounwind {
+; X86-LABEL: set_ne_i512:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $272, %esp # imm = 0x110
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrl $3, %eax
+; X86-NEXT: andl $60, %eax
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: leal {{[0-9]+}}(%esp), %edx
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 24(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl $31, %ecx
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 56(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 60(%edx), %esi
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%edx), %ebx
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%edx), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edi, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl 52(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl %cl, %esi, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 4(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl 40(%edx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: movl 8(%edx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: movl 56(%edx), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edi, %ebx
+; X86-NEXT: movl 24(%edx), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: orl %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%eax), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl 12(%eax), %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: orl %esi, %ebx
+; X86-NEXT: movl 60(%eax), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl 28(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl (%eax), %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NEXT: negl %eax
+; X86-NEXT: movl 240(%esp,%eax), %esi
+; X86-NEXT: shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: movl 32(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edi, %eax
+; X86-NEXT: movl (%esi), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edx, %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl 16(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ebx, %eax
+; X86-NEXT: movl 48(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl (%esp), %edx # 4-byte Folded Reload
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 36(%esi), %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl 4(%esi), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl 20(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl %esi, %edi
+; X86-NEXT: movl 52(%eax), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: orl %edi, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: orl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl %ecx, (%esp) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: movl %ebx, 60(%edx)
+; X86-NEXT: movl %edi, 56(%edx)
+; X86-NEXT: movl %ecx, 52(%edx)
+; X86-NEXT: movl %esi, 44(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 40(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 36(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 32(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 28(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 24(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 20(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 16(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 12(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 8(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 4(%edx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, (%edx)
+; X86-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, 48(%edx)
+; X86-NEXT: setne %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: set_ne_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rbp
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %r13
+; SSE-NEXT: pushq %r12
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: subq $56, %rsp
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: andl $63, %ecx
+; SSE-NEXT: shrl $3, %esi
+; SSE-NEXT: andl $56, %esi
+; SSE-NEXT: negl %esi
+; SSE-NEXT: movslq %esi, %rbx
+; SSE-NEXT: movq (%rsp,%rbx), %rsi
+; SSE-NEXT: movq 8(%rsp,%rbx), %r14
+; SSE-NEXT: movq %r14, %rax
+; SSE-NEXT: shldq %cl, %rsi, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 32(%rsp,%rbx), %r8
+; SSE-NEXT: movq 40(%rsp,%rbx), %rbp
+; SSE-NEXT: shldq %cl, %r8, %rbp
+; SSE-NEXT: movq 16(%rsp,%rbx), %r9
+; SSE-NEXT: movq 24(%rsp,%rbx), %r15
+; SSE-NEXT: movq %r15, %r10
+; SSE-NEXT: shldq %cl, %r9, %r10
+; SSE-NEXT: movq -8(%rsp,%rbx), %r11
+; SSE-NEXT: shldq %cl, %r11, %rsi
+; SSE-NEXT: shldq %cl, %r15, %r8
+; SSE-NEXT: shldq %cl, %r14, %r9
+; SSE-NEXT: movq -16(%rsp,%rbx), %rbx
+; SSE-NEXT: shldq %cl, %rbx, %r11
+; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT: shlq %cl, %rbx
+; SSE-NEXT: movq 24(%rdi), %r15
+; SSE-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 56(%rdi), %rcx
+; SSE-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 16(%rdi), %r12
+; SSE-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 48(%rdi), %r13
+; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %r8, %r13
+; SSE-NEXT: andq %rsi, %r12
+; SSE-NEXT: orq %r13, %r12
+; SSE-NEXT: movq %rcx, %r13
+; SSE-NEXT: andq %rbp, %r13
+; SSE-NEXT: andq %rax, %r15
+; SSE-NEXT: orq %r13, %r15
+; SSE-NEXT: movq 32(%rdi), %r14
+; SSE-NEXT: movq %r14, %rcx
+; SSE-NEXT: andq %r9, %rcx
+; SSE-NEXT: movq (%rdi), %r13
+; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %rbx, %r13
+; SSE-NEXT: orq %rcx, %r13
+; SSE-NEXT: orq %r12, %r13
+; SSE-NEXT: movq 40(%rdi), %rcx
+; SSE-NEXT: movq %rcx, %r12
+; SSE-NEXT: andq %r10, %r12
+; SSE-NEXT: movq 8(%rdi), %rdx
+; SSE-NEXT: movq %rdx, %rax
+; SSE-NEXT: andq %r11, %rax
+; SSE-NEXT: orq %r12, %rax
+; SSE-NEXT: orq %r15, %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; SSE-NEXT: orq %rcx, %r10
+; SSE-NEXT: orq %r14, %r9
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; SSE-NEXT: orq %rdx, %r11
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; SSE-NEXT: orq %r13, %rax
+; SSE-NEXT: movq %r8, 48(%rdi)
+; SSE-NEXT: movq %rbp, 56(%rdi)
+; SSE-NEXT: movq %r9, 32(%rdi)
+; SSE-NEXT: movq %r10, 40(%rdi)
+; SSE-NEXT: movq %rsi, 16(%rdi)
+; SSE-NEXT: movq %r15, 24(%rdi)
+; SSE-NEXT: movq %rbx, (%rdi)
+; SSE-NEXT: movq %r11, 8(%rdi)
+; SSE-NEXT: setne %al
+; SSE-NEXT: addq $56, %rsp
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r12
+; SSE-NEXT: popq %r13
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: popq %rbp
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: set_ne_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %r13
+; AVX2-NEXT: pushq %r12
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: subq $72, %rsp
+; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0]
+; AVX2-NEXT: vmovups %ymm0, (%rsp)
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: andl $63, %ecx
+; AVX2-NEXT: shrl $3, %esi
+; AVX2-NEXT: andl $56, %esi
+; AVX2-NEXT: negl %esi
+; AVX2-NEXT: movslq %esi, %rbx
+; AVX2-NEXT: movq 16(%rsp,%rbx), %rsi
+; AVX2-NEXT: movq 24(%rsp,%rbx), %rbp
+; AVX2-NEXT: movq %rbp, %rax
+; AVX2-NEXT: shldq %cl, %rsi, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 48(%rsp,%rbx), %r8
+; AVX2-NEXT: movq 56(%rsp,%rbx), %r13
+; AVX2-NEXT: shldq %cl, %r8, %r13
+; AVX2-NEXT: movq 32(%rsp,%rbx), %r9
+; AVX2-NEXT: movq 40(%rsp,%rbx), %r14
+; AVX2-NEXT: movq %r14, %r10
+; AVX2-NEXT: shldq %cl, %r9, %r10
+; AVX2-NEXT: movq 8(%rsp,%rbx), %r11
+; AVX2-NEXT: shldq %cl, %r11, %rsi
+; AVX2-NEXT: shldq %cl, %r14, %r8
+; AVX2-NEXT: movq 16(%rdi), %r12
+; AVX2-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 48(%rdi), %r14
+; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq %r8, %r14
+; AVX2-NEXT: andq %rsi, %r12
+; AVX2-NEXT: orq %r14, %r12
+; AVX2-NEXT: movq 56(%rdi), %r15
+; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq %r13, %r15
+; AVX2-NEXT: movq 24(%rdi), %r14
+; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq %rax, %r14
+; AVX2-NEXT: orq %r15, %r14
+; AVX2-NEXT: shldq %cl, %rbp, %r9
+; AVX2-NEXT: movq (%rsp,%rbx), %rdx
+; AVX2-NEXT: movq 32(%rdi), %r15
+; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq %r9, %r15
+; AVX2-NEXT: shlxq %rcx, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq (%rdi), %rbx
+; AVX2-NEXT: movq %rbx, %rbp
+; AVX2-NEXT: andq %rax, %rbp
+; AVX2-NEXT: orq %r15, %rbp
+; AVX2-NEXT: orq %r12, %rbp
+; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
+; AVX2-NEXT: shldq %cl, %rdx, %r11
+; AVX2-NEXT: movq 40(%rdi), %rax
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: andq %r10, %rcx
+; AVX2-NEXT: movq 8(%rdi), %r15
+; AVX2-NEXT: movq %r15, %r12
+; AVX2-NEXT: andq %r11, %r12
+; AVX2-NEXT: orq %rcx, %r12
+; AVX2-NEXT: orq %r14, %r12
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; AVX2-NEXT: orq %rax, %r10
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; AVX2-NEXT: orq %r15, %r11
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; AVX2-NEXT: orq %rbp, %r12
+; AVX2-NEXT: movq %r8, 48(%rdi)
+; AVX2-NEXT: movq %r13, 56(%rdi)
+; AVX2-NEXT: movq %r9, 32(%rdi)
+; AVX2-NEXT: movq %r10, 40(%rdi)
+; AVX2-NEXT: movq %rsi, 16(%rdi)
+; AVX2-NEXT: movq %rcx, 24(%rdi)
+; AVX2-NEXT: movq %rbx, (%rdi)
+; AVX2-NEXT: movq %r11, 8(%rdi)
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: addq $72, %rsp
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r12
+; AVX2-NEXT: popq %r13
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: set_ne_i512:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rbp
+; AVX512-NEXT: pushq %r15
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %r13
+; AVX512-NEXT: pushq %r12
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: subq $72, %rsp
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0]
+; AVX512-NEXT: vmovups %ymm0, (%rsp)
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: andl $63, %ecx
+; AVX512-NEXT: shrl $3, %esi
+; AVX512-NEXT: andl $56, %esi
+; AVX512-NEXT: negl %esi
+; AVX512-NEXT: movslq %esi, %rbx
+; AVX512-NEXT: movq 16(%rsp,%rbx), %rsi
+; AVX512-NEXT: movq 24(%rsp,%rbx), %rbp
+; AVX512-NEXT: movq %rbp, %rax
+; AVX512-NEXT: shldq %cl, %rsi, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 48(%rsp,%rbx), %r8
+; AVX512-NEXT: movq 56(%rsp,%rbx), %r13
+; AVX512-NEXT: shldq %cl, %r8, %r13
+; AVX512-NEXT: movq 32(%rsp,%rbx), %r9
+; AVX512-NEXT: movq 40(%rsp,%rbx), %r14
+; AVX512-NEXT: movq %r14, %r10
+; AVX512-NEXT: shldq %cl, %r9, %r10
+; AVX512-NEXT: movq 8(%rsp,%rbx), %r11
+; AVX512-NEXT: shldq %cl, %r11, %rsi
+; AVX512-NEXT: shldq %cl, %r14, %r8
+; AVX512-NEXT: movq 16(%rdi), %r12
+; AVX512-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 48(%rdi), %r14
+; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: andq %r8, %r14
+; AVX512-NEXT: andq %rsi, %r12
+; AVX512-NEXT: orq %r14, %r12
+; AVX512-NEXT: movq 56(%rdi), %r15
+; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: andq %r13, %r15
+; AVX512-NEXT: movq 24(%rdi), %r14
+; AVX512-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: andq %rax, %r14
+; AVX512-NEXT: orq %r15, %r14
+; AVX512-NEXT: shldq %cl, %rbp, %r9
+; AVX512-NEXT: movq (%rsp,%rbx), %rdx
+; AVX512-NEXT: movq 32(%rdi), %r15
+; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: andq %r9, %r15
+; AVX512-NEXT: shlxq %rcx, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq (%rdi), %rbx
+; AVX512-NEXT: movq %rbx, %rbp
+; AVX512-NEXT: andq %rax, %rbp
+; AVX512-NEXT: orq %r15, %rbp
+; AVX512-NEXT: orq %r12, %rbp
+; AVX512-NEXT: # kill: def $cl killed $cl killed $rcx
+; AVX512-NEXT: shldq %cl, %rdx, %r11
+; AVX512-NEXT: movq 40(%rdi), %rax
+; AVX512-NEXT: movq %rax, %rcx
+; AVX512-NEXT: andq %r10, %rcx
+; AVX512-NEXT: movq 8(%rdi), %r15
+; AVX512-NEXT: movq %r15, %r12
+; AVX512-NEXT: andq %r11, %r12
+; AVX512-NEXT: orq %rcx, %r12
+; AVX512-NEXT: orq %r14, %r12
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; AVX512-NEXT: orq %rax, %r10
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
+; AVX512-NEXT: orq %r15, %r11
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; AVX512-NEXT: orq %rbp, %r12
+; AVX512-NEXT: movq %r8, 48(%rdi)
+; AVX512-NEXT: movq %r13, 56(%rdi)
+; AVX512-NEXT: movq %r9, 32(%rdi)
+; AVX512-NEXT: movq %r10, 40(%rdi)
+; AVX512-NEXT: movq %rsi, 16(%rdi)
+; AVX512-NEXT: movq %rcx, 24(%rdi)
+; AVX512-NEXT: movq %rbx, (%rdi)
+; AVX512-NEXT: movq %r11, 8(%rdi)
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: addq $72, %rsp
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r12
+; AVX512-NEXT: popq %r13
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %r15
+; AVX512-NEXT: popq %rbp
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %rem = and i32 %position, 511
+ %ofs = zext nneg i32 %rem to i512
+ %bit = shl nuw i512 1, %ofs
+ %ld = load i512, ptr %word
+ %test = and i512 %ld, %bit
+ %res = or i512 %ld, %bit
+ %cmp = icmp ne i512 %test, 0
+ store i512 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @init_eq_i512(ptr %word, i32 %position, i1 zeroext %value) nounwind {
+; X86-LABEL: init_eq_i512:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $432, %esp # imm = 0x1B0
+; X86-NEXT: movl 12(%ebp), %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: shrl $3, %edx
+; X86-NEXT: andl $60, %edx
+; X86-NEXT: leal {{[0-9]+}}(%esp), %esi
+; X86-NEXT: subl %edx, %esi
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 56(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 60(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 52(%esi), %eax
+; X86-NEXT: movl 48(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 24(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl (%esi), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 4(%esi), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movzbl 16(%ebp), %ebx
+; X86-NEXT: movzbl %bl, %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %esi
+; X86-NEXT: subl %edx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl $31, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl %cl, %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl %cl, %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl %cl, %edx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl %cl, %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: shldl %cl, %edi, %edx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 8(%ebp), %ebx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%ebx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %eax, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%ebx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %eax, %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%ebx), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edx, %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 56(%ebx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %edx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 24(%ebx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %edx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 52(%ebx), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edx, %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%ebx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%ebx), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%ebx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 60(%ebx), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%ebx), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %eax, %edx
+; X86-NEXT: orl %esi, %edx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%ebx), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl (%ebx), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edi, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%ebx), %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 4(%ebx), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %edx, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl 56(%edi), %ebx
+; X86-NEXT: movl 60(%edi), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: shldl %cl, %ebx, %eax
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 52(%edi), %eax
+; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 48(%edi), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %esi, %eax
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: notl %eax
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: movl 40(%edi), %ebx
+; X86-NEXT: movl 44(%edi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %ebx, %eax
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 36(%edi), %eax
+; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 32(%edi), %ebx
+; X86-NEXT: shldl %cl, %ebx, %eax
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 28(%edi), %eax
+; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 24(%edi), %ebx
+; X86-NEXT: shldl %cl, %ebx, %eax
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 20(%edi), %eax
+; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 16(%edi), %ebx
+; X86-NEXT: shldl %cl, %ebx, %eax
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 12(%edi), %eax
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: notl %esi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl 8(%edi), %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: notl %eax
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl 4(%edi), %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: shldl %cl, %ebx, %edx
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl %cl, %esi, %eax
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: notl %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl (%edi), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: notl %edi
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 60(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 56(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 52(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 44(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 40(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 36(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 32(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 28(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 24(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 20(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 16(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl %esi, 48(%eax)
+; X86-NEXT: sete %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: init_eq_i512:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rbp
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %r13
+; SSE-NEXT: pushq %r12
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: subq $216, %rsp
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $1, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: andl $63, %ecx
+; SSE-NEXT: shrl $3, %esi
+; SSE-NEXT: andl $56, %esi
+; SSE-NEXT: negl %esi
+; SSE-NEXT: movslq %esi, %r10
+; SSE-NEXT: movq 184(%rsp,%r10), %r11
+; SSE-NEXT: movq 192(%rsp,%r10), %rsi
+; SSE-NEXT: movq %rsi, %r13
+; SSE-NEXT: shldq %cl, %r11, %r13
+; SSE-NEXT: movq 200(%rsp,%r10), %r15
+; SSE-NEXT: shldq %cl, %rsi, %r15
+; SSE-NEXT: movq 168(%rsp,%r10), %rbx
+; SSE-NEXT: movq 176(%rsp,%r10), %rsi
+; SSE-NEXT: movq %rsi, %r14
+; SSE-NEXT: shldq %cl, %rbx, %r14
+; SSE-NEXT: shldq %cl, %rsi, %r11
+; SSE-NEXT: movq 152(%rsp,%r10), %rax
+; SSE-NEXT: movq 160(%rsp,%r10), %r8
+; SSE-NEXT: movq %r8, %r12
+; SSE-NEXT: shldq %cl, %rax, %r12
+; SSE-NEXT: shldq %cl, %r8, %rbx
+; SSE-NEXT: movq 144(%rsp,%r10), %r9
+; SSE-NEXT: movq %r9, %r8
+; SSE-NEXT: shlq %cl, %r8
+; SSE-NEXT: shldq %cl, %r9, %rax
+; SSE-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movl %edx, %edx
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, (%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq 16(%rdi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 48(%rdi), %rsi
+; SSE-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %rsi, %r13
+; SSE-NEXT: andq %rdx, %r12
+; SSE-NEXT: orq %r13, %r12
+; SSE-NEXT: movq %r15, %rsi
+; SSE-NEXT: movq 56(%rdi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %rdx, %r15
+; SSE-NEXT: movq %rbx, %r13
+; SSE-NEXT: movq 24(%rdi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %rdx, %rbx
+; SSE-NEXT: orq %r15, %rbx
+; SSE-NEXT: movq %r14, %rbp
+; SSE-NEXT: movq 32(%rdi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %rdx, %r14
+; SSE-NEXT: movq %r8, %r15
+; SSE-NEXT: movq (%rdi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %rdx, %r8
+; SSE-NEXT: orq %r14, %r8
+; SSE-NEXT: orq %r12, %r8
+; SSE-NEXT: movq %r11, %r12
+; SSE-NEXT: movq 40(%rdi), %r9
+; SSE-NEXT: andq %r9, %r11
+; SSE-NEXT: movq %rax, %r14
+; SSE-NEXT: movq 8(%rdi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq %rdx, %rax
+; SSE-NEXT: orq %r11, %rax
+; SSE-NEXT: orq %rbx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: notq %rax
+; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; SSE-NEXT: movq %rax, %rdx
+; SSE-NEXT: movq 56(%rsp,%r10), %r11
+; SSE-NEXT: movq 64(%rsp,%r10), %rax
+; SSE-NEXT: movq %rax, %rbx
+; SSE-NEXT: shldq %cl, %r11, %rbx
+; SSE-NEXT: orq %rbx, %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: notq %rsi
+; SSE-NEXT: movq 72(%rsp,%r10), %rbx
+; SSE-NEXT: shldq %cl, %rax, %rbx
+; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; SSE-NEXT: orq %rbx, %rsi
+; SSE-NEXT: notq %rbp
+; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; SSE-NEXT: movq 40(%rsp,%r10), %rax
+; SSE-NEXT: movq 48(%rsp,%r10), %rdx
+; SSE-NEXT: movq %rdx, %rbx
+; SSE-NEXT: shldq %cl, %rax, %rbx
+; SSE-NEXT: orq %rbx, %rbp
+; SSE-NEXT: notq %r12
+; SSE-NEXT: andq %r9, %r12
+; SSE-NEXT: shldq %cl, %rdx, %r11
+; SSE-NEXT: movq 24(%rsp,%r10), %r9
+; SSE-NEXT: movq 32(%rsp,%r10), %rdx
+; SSE-NEXT: movq %rdx, %rbx
+; SSE-NEXT: shldq %cl, %r9, %rbx
+; SSE-NEXT: orq %r11, %r12
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; SSE-NEXT: notq %r11
+; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: orq %rbx, %r11
+; SSE-NEXT: notq %r13
+; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; SSE-NEXT: orq %rax, %r13
+; SSE-NEXT: notq %r15
+; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; SSE-NEXT: movq 16(%rsp,%r10), %rax
+; SSE-NEXT: movq %rax, %rdx
+; SSE-NEXT: shlq %cl, %rdx
+; SSE-NEXT: orq %rdx, %r15
+; SSE-NEXT: notq %r14
+; SSE-NEXT: andq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT: shldq %cl, %rax, %r9
+; SSE-NEXT: orq %r9, %r14
+; SSE-NEXT: orq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: movq %rax, 48(%rdi)
+; SSE-NEXT: movq %rsi, 56(%rdi)
+; SSE-NEXT: movq %rbp, 32(%rdi)
+; SSE-NEXT: movq %r12, 40(%rdi)
+; SSE-NEXT: movq %r11, 16(%rdi)
+; SSE-NEXT: movq %r13, 24(%rdi)
+; SSE-NEXT: movq %r15, (%rdi)
+; SSE-NEXT: movq %r14, 8(%rdi)
+; SSE-NEXT: sete %al
+; SSE-NEXT: addq $216, %rsp
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r12
+; SSE-NEXT: popq %r13
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: popq %rbp
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: init_eq_i512:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %r13
+; AVX2-NEXT: pushq %r12
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: subq $200, %rsp
+; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [1,0,0,0]
+; AVX2-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: movl %esi, %r8d
+; AVX2-NEXT: andl $63, %r8d
+; AVX2-NEXT: shrl $3, %esi
+; AVX2-NEXT: andl $56, %esi
+; AVX2-NEXT: negl %esi
+; AVX2-NEXT: movslq %esi, %rsi
+; AVX2-NEXT: movq 144(%rsp,%rsi), %r11
+; AVX2-NEXT: movq 152(%rsp,%rsi), %r12
+; AVX2-NEXT: movq %r12, %r10
+; AVX2-NEXT: movl %r8d, %ecx
+; AVX2-NEXT: shldq %cl, %r11, %r10
+; AVX2-NEXT: movq 176(%rsp,%rsi), %r14
+; AVX2-NEXT: movq 184(%rsp,%rsi), %r9
+; AVX2-NEXT: shldq %cl, %r14, %r9
+; AVX2-NEXT: movq 160(%rsp,%rsi), %r15
+; AVX2-NEXT: movq 168(%rsp,%rsi), %r13
+; AVX2-NEXT: movq %r13, %rbx
+; AVX2-NEXT: shldq %cl, %r15, %rbx
+; AVX2-NEXT: movq 128(%rsp,%rsi), %rbp
+; AVX2-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 136(%rsp,%rsi), %rax
+; AVX2-NEXT: shldq %cl, %rax, %r11
+; AVX2-NEXT: shldq %cl, %r13, %r14
+; AVX2-NEXT: shldq %cl, %r12, %r15
+; AVX2-NEXT: shldq %cl, %rbp, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movl %edx, %edx
+; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vmovups %xmm1, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq %rdx, (%rsp)
+; AVX2-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: movq 16(%rdi), %r12
+; AVX2-NEXT: movq 48(%rdi), %rbp
+; AVX2-NEXT: movq 32(%rdi), %r13
+; AVX2-NEXT: andnq %r13, %r15, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq %r15, %r13
+; AVX2-NEXT: andnq %rbp, %r14, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq %r14, %rbp
+; AVX2-NEXT: andnq %r12, %r11, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq %r11, %r12
+; AVX2-NEXT: movq 40(%rdi), %rax
+; AVX2-NEXT: orq %rbp, %r12
+; AVX2-NEXT: andnq %rax, %rbx, %rcx
+; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq %rax, %rbp
+; AVX2-NEXT: andq %rbx, %rbp
+; AVX2-NEXT: movq 56(%rdi), %rcx
+; AVX2-NEXT: andnq %rcx, %r9, %rbx
+; AVX2-NEXT: andq %r9, %rcx
+; AVX2-NEXT: movq 24(%rdi), %rax
+; AVX2-NEXT: andnq %rax, %r10, %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq %r10, %rax
+; AVX2-NEXT: orq %rcx, %rax
+; AVX2-NEXT: shlxq %r8, {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; AVX2-NEXT: movq (%rdi), %r10
+; AVX2-NEXT: andnq %r10, %rcx, %r15
+; AVX2-NEXT: andq %rcx, %r10
+; AVX2-NEXT: movq 40(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq 48(%rsp,%rsi), %r11
+; AVX2-NEXT: movq %r11, %r9
+; AVX2-NEXT: movl %r8d, %ecx
+; AVX2-NEXT: shldq %cl, %rdx, %r9
+; AVX2-NEXT: orq %r13, %r10
+; AVX2-NEXT: orq %r12, %r10
+; AVX2-NEXT: movq 8(%rdi), %r13
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX2-NEXT: andnq %r13, %rcx, %r12
+; AVX2-NEXT: andq %rcx, %r13
+; AVX2-NEXT: orq %rbp, %r13
+; AVX2-NEXT: orq %rax, %r13
+; AVX2-NEXT: movq 56(%rsp,%rsi), %rax
+; AVX2-NEXT: movl %r8d, %ecx
+; AVX2-NEXT: shldq %cl, %r11, %rax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX2-NEXT: orq %r9, %r14
+; AVX2-NEXT: orq %rax, %rbx
+; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 24(%rsp,%rsi), %rax
+; AVX2-NEXT: movq 32(%rsp,%rsi), %r9
+; AVX2-NEXT: movq %r9, %r11
+; AVX2-NEXT: shldq %cl, %rax, %r11
+; AVX2-NEXT: shldq %cl, %r9, %rdx
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; AVX2-NEXT: orq %r11, %rbp
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; AVX2-NEXT: orq %rdx, %rbx
+; AVX2-NEXT: movq 8(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq 16(%rsp,%rsi), %r9
+; AVX2-NEXT: movq %r9, %r11
+; AVX2-NEXT: shldq %cl, %rdx, %r11
+; AVX2-NEXT: shldq %cl, %r9, %rax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; AVX2-NEXT: orq %r11, %r9
+; AVX2-NEXT: movq (%rsp,%rsi), %rsi
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; AVX2-NEXT: orq %rax, %r11
+; AVX2-NEXT: shlxq %r8, %rsi, %rax
+; AVX2-NEXT: shldq %cl, %rsi, %rdx
+; AVX2-NEXT: orq %rax, %r15
+; AVX2-NEXT: orq %rdx, %r12
+; AVX2-NEXT: orq %r10, %r13
+; AVX2-NEXT: movq %r14, 48(%rdi)
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: movq %rax, 56(%rdi)
+; AVX2-NEXT: movq %rbp, 32(%rdi)
+; AVX2-NEXT: movq %rbx, 40(%rdi)
+; AVX2-NEXT: movq %r9, 16(%rdi)
+; AVX2-NEXT: movq %r11, 24(%rdi)
+; AVX2-NEXT: movq %r15, (%rdi)
+; AVX2-NEXT: movq %r12, 8(%rdi)
+; AVX2-NEXT: sete %al
+; AVX2-NEXT: addq $200, %rsp
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r12
+; AVX2-NEXT: popq %r13
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: init_eq_i512:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rbp
+; AVX512-NEXT: pushq %r15
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %r13
+; AVX512-NEXT: pushq %r12
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: subq $184, %rsp
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovaps {{.*#+}} xmm1 = [1,0,0,0]
+; AVX512-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: andl $63, %ecx
+; AVX512-NEXT: shrl $3, %esi
+; AVX512-NEXT: andl $56, %esi
+; AVX512-NEXT: negl %esi
+; AVX512-NEXT: movslq %esi, %rsi
+; AVX512-NEXT: movq 128(%rsp,%rsi), %r10
+; AVX512-NEXT: movq 136(%rsp,%rsi), %r12
+; AVX512-NEXT: movq %r12, %rax
+; AVX512-NEXT: shldq %cl, %r10, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 160(%rsp,%rsi), %r14
+; AVX512-NEXT: movq 168(%rsp,%rsi), %rax
+; AVX512-NEXT: shldq %cl, %r14, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 144(%rsp,%rsi), %r15
+; AVX512-NEXT: movq 152(%rsp,%rsi), %r11
+; AVX512-NEXT: movq %r11, %rbx
+; AVX512-NEXT: shldq %cl, %r15, %rbx
+; AVX512-NEXT: movq 120(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rax, %r10
+; AVX512-NEXT: shldq %cl, %r11, %r14
+; AVX512-NEXT: movq %rdi, %r9
+; AVX512-NEXT: movq 112(%rsp,%rsi), %r11
+; AVX512-NEXT: shldq %cl, %r12, %r15
+; AVX512-NEXT: movl %edx, %edx
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmovups %xmm1, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: movq 16(%rdi), %r12
+; AVX512-NEXT: movq 48(%rdi), %r13
+; AVX512-NEXT: movq 32(%rdi), %rbp
+; AVX512-NEXT: andnq %rbp, %r15, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: andq %r15, %rbp
+; AVX512-NEXT: andnq %r13, %r14, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: andq %r14, %r13
+; AVX512-NEXT: andnq %r12, %r10, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: andq %r10, %r12
+; AVX512-NEXT: movq 40(%rdi), %r8
+; AVX512-NEXT: orq %r13, %r12
+; AVX512-NEXT: andnq %r8, %rbx, %rdi
+; AVX512-NEXT: andq %rbx, %r8
+; AVX512-NEXT: movq 56(%r9), %r13
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; AVX512-NEXT: andnq %r13, %rdx, %r10
+; AVX512-NEXT: andq %rdx, %r13
+; AVX512-NEXT: movq 24(%r9), %rax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; AVX512-NEXT: andnq %rax, %rdx, %r15
+; AVX512-NEXT: andq %rdx, %rax
+; AVX512-NEXT: orq %r13, %rax
+; AVX512-NEXT: shlxq %rcx, %r11, %r13
+; AVX512-NEXT: movq (%r9), %rdx
+; AVX512-NEXT: andnq %rdx, %r13, %r14
+; AVX512-NEXT: andq %r13, %rdx
+; AVX512-NEXT: orq %rbp, %rdx
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r11, %rbp
+; AVX512-NEXT: orq %r12, %rdx
+; AVX512-NEXT: movq 8(%r9), %r13
+; AVX512-NEXT: andnq %r13, %rbp, %rbx
+; AVX512-NEXT: andq %rbp, %r13
+; AVX512-NEXT: orq %r8, %r13
+; AVX512-NEXT: movq 24(%rsp,%rsi), %r8
+; AVX512-NEXT: orq %rax, %r13
+; AVX512-NEXT: movq 32(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, %r12
+; AVX512-NEXT: shldq %cl, %r8, %r12
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; AVX512-NEXT: orq %r12, %r11
+; AVX512-NEXT: movq 40(%rsp,%rsi), %r12
+; AVX512-NEXT: shldq %cl, %rax, %r12
+; AVX512-NEXT: orq %r12, %r10
+; AVX512-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 8(%rsp,%rsi), %rax
+; AVX512-NEXT: movq 16(%rsp,%rsi), %r12
+; AVX512-NEXT: movq %r12, %rbp
+; AVX512-NEXT: shldq %cl, %rax, %rbp
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX512-NEXT: orq %rbp, %r10
+; AVX512-NEXT: shldq %cl, %r12, %r8
+; AVX512-NEXT: orq %r8, %rdi
+; AVX512-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq -8(%rsp,%rsi), %r8
+; AVX512-NEXT: movq (%rsp,%rsi), %r12
+; AVX512-NEXT: movq %r12, %rbp
+; AVX512-NEXT: shldq %cl, %r8, %rbp
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; AVX512-NEXT: orq %rbp, %rdi
+; AVX512-NEXT: movq -16(%rsp,%rsi), %rsi
+; AVX512-NEXT: shldq %cl, %r12, %rax
+; AVX512-NEXT: orq %rax, %r15
+; AVX512-NEXT: shlxq %rcx, %rsi, %rax
+; AVX512-NEXT: # kill: def $cl killed $cl killed $rcx
+; AVX512-NEXT: shldq %cl, %rsi, %r8
+; AVX512-NEXT: orq %rax, %r14
+; AVX512-NEXT: orq %r8, %rbx
+; AVX512-NEXT: orq %rdx, %r13
+; AVX512-NEXT: movq %r11, 48(%r9)
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: movq %rax, 56(%r9)
+; AVX512-NEXT: movq %r10, 32(%r9)
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: movq %rax, 40(%r9)
+; AVX512-NEXT: movq %rdi, 16(%r9)
+; AVX512-NEXT: movq %r15, 24(%r9)
+; AVX512-NEXT: movq %r14, (%r9)
+; AVX512-NEXT: movq %rbx, 8(%r9)
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: addq $184, %rsp
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r12
+; AVX512-NEXT: popq %r13
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %r15
+; AVX512-NEXT: popq %rbp
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %rem = and i32 %position, 511
+ %ofs = zext nneg i32 %rem to i512
+ %bit = shl nuw i512 1, %ofs
+ %mask = xor i512 %bit, -1
+ %val0 = zext i1 %value to i512
+ %val = shl nuw i512 %val0, %ofs
+ %ld = load i512, ptr %word
+ %test = and i512 %ld, %bit
+ %res0 = and i512 %ld, %mask
+ %res = or i512 %res0, %val
+ %cmp = icmp eq i512 %test, 0
+ store i512 %res, ptr %word
+ ret i1 %cmp
+}
+
+; i4096
+
+define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind {
+; X86-LABEL: test_ne_i4096:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $1792, %esp # imm = 0x700
+; X86-NEXT: movl 12(%ebp), %ebx
+; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: shrl $3, %ecx
+; X86-NEXT: andl $508, %ecx # imm = 0x1FC
+; X86-NEXT: leal {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl %ecx, %esi
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 248(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 252(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl $31, %ebx
+; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 504(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 508(%esi), %edx
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 120(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 124(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 376(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 380(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 184(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 188(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 440(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 444(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 56(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 60(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 312(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 316(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 216(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 220(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 472(%esi), %edi
+; X86-NEXT: movl 476(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 88(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 92(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 344(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 348(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 152(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 156(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 408(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 412(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 24(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 28(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 280(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 284(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 232(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 236(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 488(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 492(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 104(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 108(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 360(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 364(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 168(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 172(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 424(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 428(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 296(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 300(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 200(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 204(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 456(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 460(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 72(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 76(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 328(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 332(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 136(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 140(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 392(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 396(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 12(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 264(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 268(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 240(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 244(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 496(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 500(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 112(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 116(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 368(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 372(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 176(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 180(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 432(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 436(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 52(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 304(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 308(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 208(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 212(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 464(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 468(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 80(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 84(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 336(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 340(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 144(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 148(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 400(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 404(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 16(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 20(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 272(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 276(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 224(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 228(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 480(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 484(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 96(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 100(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 352(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 356(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 160(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 164(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 416(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 420(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 36(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 288(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 292(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 192(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 196(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 448(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 452(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 64(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 68(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 320(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 324(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 128(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 132(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: movl 256(%esi), %edi
+; X86-NEXT: movl 260(%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl %cl, %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl 388(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl 4(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shrdl $1, %eax, %edi
+; X86-NEXT: shrl %eax
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: notb %cl
+; X86-NEXT: shrdl %cl, %eax, %edi
+; X86-NEXT: shrl %cl, %ebx
+; X86-NEXT: movb $32, %cl
+; X86-NEXT: testb %cl, %cl
+; X86-NEXT: movl (%esi), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: jne .LBB20_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: .LBB20_2:
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 320(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl 64(%eax), %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 448(%eax), %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 192(%eax), %ecx
+; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: orl %esi, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 288(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 32(%eax), %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 416(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl 160(%eax), %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: orl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 352(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl 96(%eax), %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 480(%eax), %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 224(%eax), %ecx
+; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: orl %esi, %ecx
+; X86-NEXT: orl %edi, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 272(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 16(%eax), %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 400(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl 144(%eax), %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 336(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 80(%eax), %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 464(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl 208(%eax), %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: orl %edx, %edi
+; X86-NEXT: orl %esi, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 304(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 48(%eax), %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 432(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl 176(%eax), %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 368(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 112(%eax), %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 496(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: andl 240(%eax), %ebx
+; X86-NEXT: orl %ecx, %ebx
+; X86-NEXT: orl %edx, %ebx
+; X86-NEXT: orl %esi, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 264(%eax), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 8(%eax), %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 392(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 136(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 328(%ebx), %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 72(%ebx), %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl 456(%ebx), %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl 200(%ebx), %esi
+; X86-NEXT: orl %edi, %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl 296(%ebx), %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 40(%ebx), %eax
+; X86-NEXT: orl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl 424(%ebx), %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 168(%ebx), %edx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 360(%ebx), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 104(%ebx), %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 488(%ebx), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl 232(%ebx), %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: orl %edx, %edi
+; X86-NEXT: orl %esi, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 280(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 24(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 408(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 152(%ebx), %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 344(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 88(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 472(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl 216(%ebx), %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 312(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 56(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 440(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 184(%ebx), %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 376(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 120(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 504(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl 248(%ebx), %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: orl %edx, %edi
+; X86-NEXT: orl %esi, %edi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 324(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 68(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 452(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 196(%ebx), %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 292(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 36(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 420(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 164(%ebx), %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 356(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 100(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 484(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl 228(%ebx), %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 276(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 20(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 404(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 148(%ebx), %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 340(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 84(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 468(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl 212(%ebx), %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 308(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 52(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 436(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 180(%ebx), %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 372(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 116(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 500(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl 244(%ebx), %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: orl %edx, %edi
+; X86-NEXT: orl %esi, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 268(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 12(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 396(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 140(%ebx), %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 332(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 76(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 460(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl 204(%ebx), %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: orl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 300(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 44(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 428(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 172(%ebx), %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 364(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 108(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 492(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: andl 236(%ebx), %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: orl %edi, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 284(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 28(%ebx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 412(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl 156(%ebx), %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 348(%ebx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 92(%ebx), %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 476(%ebx), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 220(%ebx), %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: orl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 316(%ebx), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 60(%ebx), %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 444(%ebx), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: andl 188(%ebx), %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: orl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 380(%ebx), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: andl 124(%ebx), %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 508(%ebx), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl 8(%ebp), %esi
+; X86-NEXT: andl 252(%esi), %ebx
+; X86-NEXT: orl %ecx, %ebx
+; X86-NEXT: orl %edx, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: orl %eax, %ebx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: negl %ecx
+; X86-NEXT: movl 1648(%esp,%ecx), %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl %cl, %edi, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl %cl, %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl 8(%ebp), %edx
+; X86-NEXT: andl 128(%edx), %ecx
+; X86-NEXT: andl 384(%edx), %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: andl (%edx), %eax
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 256(%edx), %eax
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 260(%edx), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: andl 4(%edx), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: andl 132(%edx), %eax
+; X86-NEXT: andl 388(%edx), %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: orl %ebx, %esi
+; X86-NEXT: orl %edi, %esi
+; X86-NEXT: setne %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: test_ne_i4096:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rbp
+; SSE-NEXT: pushq %r15
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %r13
+; SSE-NEXT: pushq %r12
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: subq $1576, %rsp # imm = 0x628
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl %esi, %eax
+; SSE-NEXT: andl $4032, %eax # imm = 0xFC0
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movups %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; SSE-NEXT: movq $1, {{[0-9]+}}(%rsp)
+; SSE-NEXT: andl $63, %ecx
+; SSE-NEXT: shrl $3, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: movslq %eax, %rsi
+; SSE-NEXT: movq 1296(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1304(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1552(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1560(%rsp,%rsi), %rax
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1168(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1176(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1424(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1432(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1232(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1240(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1488(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1496(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1104(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1112(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1360(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, (%rsp) # 8-byte Spill
+; SSE-NEXT: movq 1368(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1264(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1272(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1520(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1528(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1136(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1144(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1392(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1400(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1200(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1208(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1456(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1464(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1072(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1080(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1328(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1336(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1280(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1288(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1536(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1544(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1152(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1160(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1408(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1416(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1216(%rsp,%rsi), %r11
+; SSE-NEXT: movq 1224(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %r11, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1472(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1480(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1088(%rsp,%rsi), %r9
+; SSE-NEXT: movq 1096(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %r9, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1344(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1352(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1248(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1256(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rax, %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1504(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1512(%rsp,%rsi), %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rdx, %rax
+; SSE-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1120(%rsp,%rsi), %rax
+; SSE-NEXT: movq 1128(%rsp,%rsi), %r8
+; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: shldq %cl, %rax, %r8
+; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1376(%rsp,%rsi), %r13
+; SSE-NEXT: movq 1384(%rsp,%rsi), %rbx
+; SSE-NEXT: movq %rbx, %r8
+; SSE-NEXT: shldq %cl, %r13, %r8
+; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1184(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1192(%rsp,%rsi), %r15
+; SSE-NEXT: movq %r15, %r14
+; SSE-NEXT: shldq %cl, %rdx, %r14
+; SSE-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1440(%rsp,%rsi), %r10
+; SSE-NEXT: movq 1448(%rsp,%rsi), %rdx
+; SSE-NEXT: movq %rdx, %r14
+; SSE-NEXT: shldq %cl, %r10, %r14
+; SSE-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1312(%rsp,%rsi), %r14
+; SSE-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq 1320(%rsp,%rsi), %rbp
+; SSE-NEXT: movq %rbp, %r12
+; SSE-NEXT: shldq %cl, %r14, %r12
+; SSE-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r8, (%rsp) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq 1064(%rsp,%rsi), %rbx
+; SSE-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %rbp, %r14
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; SSE-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; SSE-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; SSE-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; SSE-NEXT: shldq %cl, %rdx, %r11
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r15, %rdx
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r15, %r9
+; SSE-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r15, %r8
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r15, %rbp
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r15, %r9
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r15, %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r15, %r13
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r12, %r15
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; SSE-NEXT: shldq %cl, %r12, %r10
+; SSE-NEXT: andq 384(%rdi), %r10
+; SSE-NEXT: andq 128(%rdi), %r15
+; SSE-NEXT: andq 320(%rdi), %r13
+; SSE-NEXT: andq 64(%rdi), %rax
+; SSE-NEXT: orq %r10, %r15
+; SSE-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: orq %r13, %rax
+; SSE-NEXT: andq 448(%rdi), %r9
+; SSE-NEXT: andq 192(%rdi), %rbp
+; SSE-NEXT: orq %r9, %rbp
+; SSE-NEXT: orq %rax, %rbp
+; SSE-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: andq 288(%rdi), %r8
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; SSE-NEXT: andq 32(%rdi), %r9
+; SSE-NEXT: andq 416(%rdi), %rdx
+; SSE-NEXT: andq 160(%rdi), %r11
+; SSE-NEXT: orq %r8, %r9
+; SSE-NEXT: orq %rdx, %r11
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; SSE-NEXT: andq 352(%rdi), %rdx
+; SSE-NEXT: orq %r9, %r11
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 96(%rdi), %rax
+; SSE-NEXT: orq %rdx, %rax
+; SSE-NEXT: movq %rax, %rdx
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 480(%rdi), %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: andq 224(%rdi), %r8
+; SSE-NEXT: orq %rax, %r8
+; SSE-NEXT: orq %rdx, %r8
+; SSE-NEXT: andq 272(%rdi), %r14
+; SSE-NEXT: orq %r11, %r8
+; SSE-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 16(%rdi), %rax
+; SSE-NEXT: orq %r14, %rax
+; SSE-NEXT: movq %rax, %r8
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; SSE-NEXT: andq 400(%rdi), %rdx
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 144(%rdi), %rax
+; SSE-NEXT: orq %rdx, %rax
+; SSE-NEXT: orq %r8, %rax
+; SSE-NEXT: movq %rax, %r8
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; SSE-NEXT: andq 336(%rdi), %r9
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 80(%rdi), %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; SSE-NEXT: andq 464(%rdi), %rdx
+; SSE-NEXT: orq %r9, %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; SSE-NEXT: andq 208(%rdi), %r11
+; SSE-NEXT: orq %rdx, %r11
+; SSE-NEXT: orq %rax, %r11
+; SSE-NEXT: orq %r8, %r11
+; SSE-NEXT: movq (%rsp), %rdx # 8-byte Reload
+; SSE-NEXT: andq 304(%rdi), %rdx
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 48(%rdi), %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; SSE-NEXT: andq 432(%rdi), %r9
+; SSE-NEXT: orq %rdx, %rax
+; SSE-NEXT: movq %rax, %r10
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: andq 176(%rdi), %r8
+; SSE-NEXT: orq %r9, %r8
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; SSE-NEXT: andq 368(%rdi), %r9
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 112(%rdi), %rax
+; SSE-NEXT: orq %r10, %r8
+; SSE-NEXT: movq %r8, %r10
+; SSE-NEXT: orq %r9, %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: andq 496(%rdi), %r8
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; SSE-NEXT: andq 240(%rdi), %rbp
+; SSE-NEXT: orq %r8, %rbp
+; SSE-NEXT: orq %rax, %rbp
+; SSE-NEXT: orq %r10, %rbp
+; SSE-NEXT: orq %r11, %rbp
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 392(%rdi), %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; SSE-NEXT: andq 136(%rdi), %r12
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; SSE-NEXT: andq 328(%rdi), %rdx
+; SSE-NEXT: orq %rax, %r12
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 72(%rdi), %rax
+; SSE-NEXT: orq %rdx, %rax
+; SSE-NEXT: movq %rax, %rdx
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 456(%rdi), %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; SSE-NEXT: andq 200(%rdi), %r13
+; SSE-NEXT: orq %rax, %r13
+; SSE-NEXT: orq %rdx, %r13
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; SSE-NEXT: andq 296(%rdi), %rdx
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 40(%rdi), %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: andq 424(%rdi), %r8
+; SSE-NEXT: orq %rdx, %rax
+; SSE-NEXT: movq %rax, %r9
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; SSE-NEXT: andq 168(%rdi), %rdx
+; SSE-NEXT: orq %r8, %rdx
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: andq 360(%rdi), %r8
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 104(%rdi), %rax
+; SSE-NEXT: orq %r9, %rdx
+; SSE-NEXT: orq %r8, %rax
+; SSE-NEXT: movq %rax, %r8
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 488(%rdi), %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; SSE-NEXT: andq 232(%rdi), %r15
+; SSE-NEXT: orq %rax, %r15
+; SSE-NEXT: orq %r8, %r15
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: andq 280(%rdi), %r8
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 24(%rdi), %rax
+; SSE-NEXT: orq %rdx, %r15
+; SSE-NEXT: orq %r8, %rax
+; SSE-NEXT: movq %rax, %r10
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: andq 408(%rdi), %r8
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 152(%rdi), %rax
+; SSE-NEXT: orq %r8, %rax
+; SSE-NEXT: orq %r10, %rax
+; SSE-NEXT: movq %rax, %r10
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; SSE-NEXT: andq 344(%rdi), %r11
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: andq 88(%rdi), %r8
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 472(%rdi), %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; SSE-NEXT: andq 216(%rdi), %r14
+; SSE-NEXT: orq %r11, %r8
+; SSE-NEXT: orq %rax, %r14
+; SSE-NEXT: orq %r8, %r14
+; SSE-NEXT: orq %r10, %r14
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; SSE-NEXT: andq 312(%rdi), %r11
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; SSE-NEXT: andq 56(%rdi), %r10
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: andq 440(%rdi), %r8
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; SSE-NEXT: andq 184(%rdi), %r9
+; SSE-NEXT: orq %r11, %r10
+; SSE-NEXT: orq %r8, %r9
+; SSE-NEXT: orq %r10, %r9
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; SSE-NEXT: shldq %cl, %rax, %rdx
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; SSE-NEXT: andq 376(%rdi), %r10
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; SSE-NEXT: andq 120(%rdi), %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; SSE-NEXT: andq 504(%rdi), %r11
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; SSE-NEXT: andq 248(%rdi), %r8
+; SSE-NEXT: orq %r10, %rax
+; SSE-NEXT: movq %rax, %r10
+; SSE-NEXT: orq %r11, %r8
+; SSE-NEXT: movq 1056(%rsp,%rsi), %rax
+; SSE-NEXT: shldq %cl, %rax, %rbx
+; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT: shlq %cl, %rax
+; SSE-NEXT: orq %r10, %r8
+; SSE-NEXT: orq %r9, %r8
+; SSE-NEXT: andq 256(%rdi), %rdx
+; SSE-NEXT: orq %r14, %r8
+; SSE-NEXT: andq (%rdi), %rax
+; SSE-NEXT: orq %rdx, %rax
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; SSE-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; SSE-NEXT: orq %rbp, %rax
+; SSE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; SSE-NEXT: andq 264(%rdi), %rcx
+; SSE-NEXT: andq 8(%rdi), %rbx
+; SSE-NEXT: orq %rcx, %rbx
+; SSE-NEXT: orq %r12, %rbx
+; SSE-NEXT: orq %r13, %rbx
+; SSE-NEXT: orq %r15, %rbx
+; SSE-NEXT: orq %r8, %rbx
+; SSE-NEXT: orq %rax, %rbx
+; SSE-NEXT: setne %al
+; SSE-NEXT: addq $1576, %rsp # imm = 0x628
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r12
+; SSE-NEXT: popq %r13
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %r15
+; SSE-NEXT: popq %rbp
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_ne_i4096:
+; AVX2: # %bb.0:
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %r13
+; AVX2-NEXT: pushq %r12
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: subq $1560, %rsp # imm = 0x618
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: movl %esi, %eax
+; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: andl $4032, %eax # imm = 0xFC0
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0]
+; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT: andl $63, %ecx
+; AVX2-NEXT: shrl $3, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: movslq %eax, %rsi
+; AVX2-NEXT: movq 1280(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1288(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1536(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1544(%rsp,%rsi), %rax
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1152(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1160(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1408(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1416(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1216(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, (%rsp) # 8-byte Spill
+; AVX2-NEXT: movq 1224(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1472(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1480(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1088(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1096(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1344(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1352(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1248(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1256(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1504(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1512(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1120(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1128(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1376(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1384(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1184(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1192(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1440(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1448(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1056(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1064(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1312(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1320(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1264(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1272(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1520(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1528(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1136(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1144(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1392(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1400(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1200(%rsp,%rsi), %r11
+; AVX2-NEXT: movq 1208(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %r11, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1456(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1464(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1072(%rsp,%rsi), %r12
+; AVX2-NEXT: movq 1080(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %r12, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1328(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1336(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rdx, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1232(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1240(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rax, %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1488(%rsp,%rsi), %rbp
+; AVX2-NEXT: movq 1496(%rsp,%rsi), %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rbp, %rax
+; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1104(%rsp,%rsi), %rax
+; AVX2-NEXT: movq 1112(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: shldq %cl, %rax, %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1360(%rsp,%rsi), %r10
+; AVX2-NEXT: movq 1368(%rsp,%rsi), %r8
+; AVX2-NEXT: movq %r8, %rdx
+; AVX2-NEXT: shldq %cl, %r10, %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1168(%rsp,%rsi), %r9
+; AVX2-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1176(%rsp,%rsi), %rbx
+; AVX2-NEXT: movq %rbx, %rdx
+; AVX2-NEXT: shldq %cl, %r9, %rdx
+; AVX2-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1424(%rsp,%rsi), %r9
+; AVX2-NEXT: movq 1432(%rsp,%rsi), %rdx
+; AVX2-NEXT: movq %rdx, %r14
+; AVX2-NEXT: shldq %cl, %r9, %r14
+; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1296(%rsp,%rsi), %r15
+; AVX2-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq 1304(%rsp,%rsi), %r14
+; AVX2-NEXT: movq %r14, %r13
+; AVX2-NEXT: shldq %cl, %r15, %r13
+; AVX2-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, (%rsp) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq 1048(%rsp,%rsi), %rdx
+; AVX2-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: shldq %cl, %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r14, %rbx
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r14, %r11
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r14, %r12
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r14, %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r14, %r13
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r14, %rbp
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r14, %rax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r14, %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, %r14
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r15, %r9
+; AVX2-NEXT: andq 384(%rdi), %r9
+; AVX2-NEXT: andq 128(%rdi), %r14
+; AVX2-NEXT: andq 320(%rdi), %r10
+; AVX2-NEXT: orq %r9, %r14
+; AVX2-NEXT: movq %r14, %r15
+; AVX2-NEXT: andq 64(%rdi), %rax
+; AVX2-NEXT: orq %r10, %rax
+; AVX2-NEXT: andq 448(%rdi), %rbp
+; AVX2-NEXT: andq 192(%rdi), %r13
+; AVX2-NEXT: orq %rbp, %r13
+; AVX2-NEXT: orq %rax, %r13
+; AVX2-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX2-NEXT: andq 288(%rdi), %r8
+; AVX2-NEXT: andq 32(%rdi), %r12
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 416(%rdi), %rax
+; AVX2-NEXT: orq %r8, %r12
+; AVX2-NEXT: andq 160(%rdi), %r11
+; AVX2-NEXT: orq %rax, %r11
+; AVX2-NEXT: andq 352(%rdi), %rbx
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 96(%rdi), %rax
+; AVX2-NEXT: orq %r12, %r11
+; AVX2-NEXT: orq %rbx, %rax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX2-NEXT: andq 480(%rdi), %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX2-NEXT: andq 224(%rdi), %r13
+; AVX2-NEXT: orq %r10, %r13
+; AVX2-NEXT: orq %rax, %r13
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 272(%rdi), %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 16(%rdi), %rax
+; AVX2-NEXT: orq %r11, %r13
+; AVX2-NEXT: orq %r8, %rax
+; AVX2-NEXT: movq %rax, %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; AVX2-NEXT: andq 400(%rdi), %r9
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 144(%rdi), %rax
+; AVX2-NEXT: orq %r9, %rax
+; AVX2-NEXT: orq %r8, %rax
+; AVX2-NEXT: movq %rax, %r9
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX2-NEXT: andq 336(%rdi), %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 80(%rdi), %rax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 464(%rdi), %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; AVX2-NEXT: andq 208(%rdi), %r11
+; AVX2-NEXT: orq %r10, %rax
+; AVX2-NEXT: orq %r8, %r11
+; AVX2-NEXT: orq %rax, %r11
+; AVX2-NEXT: orq %r9, %r11
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; AVX2-NEXT: andq 304(%rdi), %r9
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 48(%rdi), %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX2-NEXT: andq 432(%rdi), %r10
+; AVX2-NEXT: movq (%rsp), %rax # 8-byte Reload
+; AVX2-NEXT: andq 176(%rdi), %rax
+; AVX2-NEXT: orq %r9, %r8
+; AVX2-NEXT: movq %r8, %r9
+; AVX2-NEXT: orq %r10, %rax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 368(%rdi), %r8
+; AVX2-NEXT: orq %r9, %rax
+; AVX2-NEXT: movq %rax, %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 112(%rdi), %rax
+; AVX2-NEXT: orq %r8, %rax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 496(%rdi), %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; AVX2-NEXT: andq 240(%rdi), %r9
+; AVX2-NEXT: orq %r8, %r9
+; AVX2-NEXT: orq %rax, %r9
+; AVX2-NEXT: orq %r10, %r9
+; AVX2-NEXT: orq %r11, %r9
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX2-NEXT: andq 392(%rdi), %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; AVX2-NEXT: andq 136(%rdi), %rbp
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 328(%rdi), %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 72(%rdi), %rax
+; AVX2-NEXT: orq %r10, %rbp
+; AVX2-NEXT: orq %r8, %rax
+; AVX2-NEXT: movq %rax, %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 456(%rdi), %rax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; AVX2-NEXT: andq 200(%rdi), %r12
+; AVX2-NEXT: orq %rax, %r12
+; AVX2-NEXT: orq %r8, %r12
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX2-NEXT: andq 296(%rdi), %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 40(%rdi), %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; AVX2-NEXT: andq 424(%rdi), %r11
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 168(%rdi), %rax
+; AVX2-NEXT: orq %r10, %r8
+; AVX2-NEXT: movq %r8, %r10
+; AVX2-NEXT: orq %r11, %rax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 360(%rdi), %r8
+; AVX2-NEXT: orq %r10, %rax
+; AVX2-NEXT: movq %rax, %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 104(%rdi), %rax
+; AVX2-NEXT: orq %r8, %rax
+; AVX2-NEXT: movq %rax, %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 488(%rdi), %rax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX2-NEXT: andq 232(%rdi), %r14
+; AVX2-NEXT: orq %rax, %r14
+; AVX2-NEXT: orq %r8, %r14
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 280(%rdi), %r8
+; AVX2-NEXT: orq %r10, %r14
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 24(%rdi), %rax
+; AVX2-NEXT: orq %r8, %rax
+; AVX2-NEXT: movq %rax, %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 408(%rdi), %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 152(%rdi), %rax
+; AVX2-NEXT: orq %r8, %rax
+; AVX2-NEXT: orq %r10, %rax
+; AVX2-NEXT: movq %rax, %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; AVX2-NEXT: andq 344(%rdi), %r11
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 88(%rdi), %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 472(%rdi), %rax
+; AVX2-NEXT: orq %r11, %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; AVX2-NEXT: andq 216(%rdi), %rbx
+; AVX2-NEXT: orq %rax, %rbx
+; AVX2-NEXT: orq %r8, %rbx
+; AVX2-NEXT: orq %r10, %rbx
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 312(%rdi), %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 56(%rdi), %rax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX2-NEXT: andq 440(%rdi), %r10
+; AVX2-NEXT: orq %r8, %rax
+; AVX2-NEXT: movq %rax, %r11
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 184(%rdi), %r8
+; AVX2-NEXT: orq %r10, %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX2-NEXT: andq 376(%rdi), %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 120(%rdi), %rax
+; AVX2-NEXT: orq %r11, %r8
+; AVX2-NEXT: movq %r8, %r11
+; AVX2-NEXT: orq %r10, %rax
+; AVX2-NEXT: movq %rax, %r10
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: andq 504(%rdi), %r8
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX2-NEXT: andq 248(%rdi), %rax
+; AVX2-NEXT: orq %r8, %rax
+; AVX2-NEXT: orq %r10, %rax
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX2-NEXT: shldq %cl, %r8, %r10
+; AVX2-NEXT: orq %r11, %rax
+; AVX2-NEXT: movq 1040(%rsp,%rsi), %rsi
+; AVX2-NEXT: orq %rbx, %rax
+; AVX2-NEXT: movq %rax, %r8
+; AVX2-NEXT: shlxq %rcx, %rsi, %rax
+; AVX2-NEXT: andq 256(%rdi), %r10
+; AVX2-NEXT: andq (%rdi), %rax
+; AVX2-NEXT: orq %r10, %rax
+; AVX2-NEXT: orq %r15, %rax
+; AVX2-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
+; AVX2-NEXT: orq %r13, %rax
+; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
+; AVX2-NEXT: shldq %cl, %rsi, %rdx
+; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX2-NEXT: andq 264(%rdi), %rcx
+; AVX2-NEXT: andq 8(%rdi), %rdx
+; AVX2-NEXT: orq %r9, %rax
+; AVX2-NEXT: orq %rcx, %rdx
+; AVX2-NEXT: orq %rbp, %rdx
+; AVX2-NEXT: orq %r12, %rdx
+; AVX2-NEXT: orq %r14, %rdx
+; AVX2-NEXT: orq %r8, %rdx
+; AVX2-NEXT: orq %rax, %rdx
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: addq $1560, %rsp # imm = 0x618
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r12
+; AVX2-NEXT: popq %r13
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_ne_i4096:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rbp
+; AVX512-NEXT: pushq %r15
+; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %r13
+; AVX512-NEXT: pushq %r12
+; AVX512-NEXT: pushq %rbx
+; AVX512-NEXT: subq $1560, %rsp # imm = 0x618
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: movl %esi, %eax
+; AVX512-NEXT: andl $4032, %eax # imm = 0xFC0
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0]
+; AVX512-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: andl $63, %ecx
+; AVX512-NEXT: shrl $3, %eax
+; AVX512-NEXT: negl %eax
+; AVX512-NEXT: movslq %eax, %rsi
+; AVX512-NEXT: movq 1280(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1288(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1536(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1544(%rsp,%rsi), %rax
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1152(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1160(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1408(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1416(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1216(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, (%rsp) # 8-byte Spill
+; AVX512-NEXT: movq 1224(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1472(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1480(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1088(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1096(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1344(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1352(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1248(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1256(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1504(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1512(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1120(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1128(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1376(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1384(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1184(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1192(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1440(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1448(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1056(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1064(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1312(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1320(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1264(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1272(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1520(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1528(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1136(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1144(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1392(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1400(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1200(%rsp,%rsi), %r10
+; AVX512-NEXT: movq 1208(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %r10, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1456(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1464(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1072(%rsp,%rsi), %r14
+; AVX512-NEXT: movq 1080(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %r14, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1328(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1336(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rdx, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1232(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1240(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rax, %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1488(%rsp,%rsi), %r12
+; AVX512-NEXT: movq 1496(%rsp,%rsi), %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %r12, %rax
+; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1104(%rsp,%rsi), %rax
+; AVX512-NEXT: movq 1112(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: shldq %cl, %rax, %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1360(%rsp,%rsi), %r11
+; AVX512-NEXT: movq 1368(%rsp,%rsi), %rbx
+; AVX512-NEXT: movq %rbx, %rdx
+; AVX512-NEXT: shldq %cl, %r11, %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1168(%rsp,%rsi), %r9
+; AVX512-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1176(%rsp,%rsi), %r8
+; AVX512-NEXT: movq %r8, %rdx
+; AVX512-NEXT: shldq %cl, %r9, %rdx
+; AVX512-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1424(%rsp,%rsi), %r9
+; AVX512-NEXT: movq 1432(%rsp,%rsi), %rdx
+; AVX512-NEXT: movq %rdx, %r15
+; AVX512-NEXT: shldq %cl, %r9, %r15
+; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1296(%rsp,%rsi), %rbp
+; AVX512-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq 1304(%rsp,%rsi), %r15
+; AVX512-NEXT: movq %r15, %r13
+; AVX512-NEXT: shldq %cl, %rbp, %r13
+; AVX512-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r13, (%rsp) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: shldq %cl, %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq 1048(%rsp,%rsi), %rdx
+; AVX512-NEXT: shldq %cl, %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r15, %rbx
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r15, %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r15, %r14
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r15, %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r15, %r13
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r15, %r12
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r15, %rax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %r15, %r11
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %rbp, %r15
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %rbp, %r9
+; AVX512-NEXT: andq 384(%rdi), %r9
+; AVX512-NEXT: andq 128(%rdi), %r15
+; AVX512-NEXT: orq %r9, %r15
+; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX512-NEXT: andq 320(%rdi), %r11
+; AVX512-NEXT: andq 64(%rdi), %rax
+; AVX512-NEXT: orq %r11, %rax
+; AVX512-NEXT: andq 448(%rdi), %r12
+; AVX512-NEXT: andq 192(%rdi), %r13
+; AVX512-NEXT: orq %r12, %r13
+; AVX512-NEXT: orq %rax, %r13
+; AVX512-NEXT: andq 288(%rdi), %r8
+; AVX512-NEXT: andq 32(%rdi), %r14
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 416(%rdi), %rax
+; AVX512-NEXT: orq %r8, %r14
+; AVX512-NEXT: andq 160(%rdi), %r10
+; AVX512-NEXT: orq %rax, %r10
+; AVX512-NEXT: andq 352(%rdi), %rbx
+; AVX512-NEXT: orq %r14, %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 96(%rdi), %rax
+; AVX512-NEXT: orq %rbx, %rax
+; AVX512-NEXT: movq %rax, %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 480(%rdi), %rax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
+; AVX512-NEXT: andq 224(%rdi), %r15
+; AVX512-NEXT: orq %rax, %r15
+; AVX512-NEXT: orq %r8, %r15
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 272(%rdi), %r8
+; AVX512-NEXT: orq %r10, %r15
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 16(%rdi), %rax
+; AVX512-NEXT: orq %r8, %rax
+; AVX512-NEXT: movq %rax, %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; AVX512-NEXT: andq 400(%rdi), %r9
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 144(%rdi), %rax
+; AVX512-NEXT: orq %r9, %rax
+; AVX512-NEXT: orq %r8, %rax
+; AVX512-NEXT: movq %rax, %r9
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX512-NEXT: andq 336(%rdi), %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 80(%rdi), %rax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 464(%rdi), %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; AVX512-NEXT: andq 208(%rdi), %r11
+; AVX512-NEXT: orq %r10, %rax
+; AVX512-NEXT: orq %r8, %r11
+; AVX512-NEXT: orq %rax, %r11
+; AVX512-NEXT: orq %r9, %r11
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX512-NEXT: andq 304(%rdi), %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 48(%rdi), %rax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; AVX512-NEXT: andq 432(%rdi), %r9
+; AVX512-NEXT: movq (%rsp), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 176(%rdi), %r8
+; AVX512-NEXT: orq %r10, %rax
+; AVX512-NEXT: movq %rax, %r10
+; AVX512-NEXT: orq %r9, %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; AVX512-NEXT: andq 368(%rdi), %r9
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 112(%rdi), %rax
+; AVX512-NEXT: orq %r10, %r8
+; AVX512-NEXT: movq %r8, %r10
+; AVX512-NEXT: orq %r9, %rax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 496(%rdi), %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; AVX512-NEXT: andq 240(%rdi), %r9
+; AVX512-NEXT: orq %r8, %r9
+; AVX512-NEXT: orq %rax, %r9
+; AVX512-NEXT: orq %r10, %r9
+; AVX512-NEXT: orq %r11, %r9
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX512-NEXT: andq 392(%rdi), %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
+; AVX512-NEXT: andq 136(%rdi), %rbp
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 328(%rdi), %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 72(%rdi), %rax
+; AVX512-NEXT: orq %r10, %rbp
+; AVX512-NEXT: orq %r8, %rax
+; AVX512-NEXT: movq %rax, %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 456(%rdi), %rax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; AVX512-NEXT: andq 200(%rdi), %r12
+; AVX512-NEXT: orq %rax, %r12
+; AVX512-NEXT: orq %r8, %r12
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 296(%rdi), %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 40(%rdi), %rax
+; AVX512-NEXT: orq %r8, %rax
+; AVX512-NEXT: movq %rax, %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 424(%rdi), %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 168(%rdi), %rax
+; AVX512-NEXT: orq %r8, %rax
+; AVX512-NEXT: orq %r10, %rax
+; AVX512-NEXT: movq %rax, %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 360(%rdi), %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 104(%rdi), %rax
+; AVX512-NEXT: orq %r8, %rax
+; AVX512-NEXT: movq %rax, %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 488(%rdi), %rax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
+; AVX512-NEXT: andq 232(%rdi), %r14
+; AVX512-NEXT: orq %rax, %r14
+; AVX512-NEXT: orq %r8, %r14
+; AVX512-NEXT: orq %r10, %r14
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 280(%rdi), %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 24(%rdi), %rax
+; AVX512-NEXT: orq %r8, %rax
+; AVX512-NEXT: movq %rax, %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 408(%rdi), %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 152(%rdi), %rax
+; AVX512-NEXT: orq %r8, %rax
+; AVX512-NEXT: orq %r10, %rax
+; AVX512-NEXT: movq %rax, %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; AVX512-NEXT: andq 344(%rdi), %r11
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 88(%rdi), %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 472(%rdi), %rax
+; AVX512-NEXT: orq %r11, %r8
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
+; AVX512-NEXT: andq 216(%rdi), %rbx
+; AVX512-NEXT: orq %rax, %rbx
+; AVX512-NEXT: orq %r8, %rbx
+; AVX512-NEXT: orq %r10, %rbx
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX512-NEXT: andq 312(%rdi), %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 56(%rdi), %rax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 440(%rdi), %r8
+; AVX512-NEXT: orq %r10, %rax
+; AVX512-NEXT: movq %rax, %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 184(%rdi), %rax
+; AVX512-NEXT: orq %r8, %rax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 376(%rdi), %r8
+; AVX512-NEXT: orq %r10, %rax
+; AVX512-NEXT: movq %rax, %r11
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 120(%rdi), %rax
+; AVX512-NEXT: orq %r8, %rax
+; AVX512-NEXT: movq %rax, %r10
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 504(%rdi), %rax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
+; AVX512-NEXT: andq 248(%rdi), %r8
+; AVX512-NEXT: orq %rax, %r8
+; AVX512-NEXT: orq %r10, %r8
+; AVX512-NEXT: orq %r11, %r8
+; AVX512-NEXT: movq 1040(%rsp,%rsi), %rax
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; AVX512-NEXT: shldq %cl, %rsi, %r10
+; AVX512-NEXT: orq %rbx, %r8
+; AVX512-NEXT: shlxq %rcx, %rax, %rsi
+; AVX512-NEXT: andq 256(%rdi), %r10
+; AVX512-NEXT: andq (%rdi), %rsi
+; AVX512-NEXT: orq %r10, %rsi
+; AVX512-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
+; AVX512-NEXT: orq %r13, %rsi
+; AVX512-NEXT: orq %r15, %rsi
+; AVX512-NEXT: # kill: def $cl killed $cl killed $rcx
+; AVX512-NEXT: shldq %cl, %rax, %rdx
+; AVX512-NEXT: orq %r9, %rsi
+; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; AVX512-NEXT: andq 264(%rdi), %rax
+; AVX512-NEXT: andq 8(%rdi), %rdx
+; AVX512-NEXT: orq %rax, %rdx
+; AVX512-NEXT: orq %rbp, %rdx
+; AVX512-NEXT: orq %r12, %rdx
+; AVX512-NEXT: orq %r14, %rdx
+; AVX512-NEXT: orq %r8, %rdx
+; AVX512-NEXT: orq %rsi, %rdx
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: addq $1560, %rsp # imm = 0x618
+; AVX512-NEXT: popq %rbx
+; AVX512-NEXT: popq %r12
+; AVX512-NEXT: popq %r13
+; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %r15
+; AVX512-NEXT: popq %rbp
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %rem = and i32 %position, 4095
+ %ofs = zext nneg i32 %rem to i4096
+ %bit = shl nuw i4096 1, %ofs
+ %ld = load i4096, ptr %word
+ %test = and i4096 %ld, %bit
+ %cmp = icmp ne i4096 %test, 0
+ ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/X86/trunc-srl-load.ll b/llvm/test/CodeGen/X86/trunc-srl-load.ll
index 4dae143..d9c21d3 100644
--- a/llvm/test/CodeGen/X86/trunc-srl-load.ll
+++ b/llvm/test/CodeGen/X86/trunc-srl-load.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86
-; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE
-; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
-; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64
; Tests showing for the analysis of non-constant shift amounts to improve load address math
@@ -12,42 +12,20 @@
define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub64_16:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: movl 4(%eax), %esi
-; X86-NEXT: movb %ch, %cl
-; X86-NEXT: andb $16, %cl
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: shrdl %cl, %esi, %edx
-; X86-NEXT: testb $32, %ch
-; X86-NEXT: jne .LBB0_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: .LBB0_2:
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $48, %ecx
+; X86-NEXT: shrl $3, %ecx
+; X86-NEXT: movzwl (%eax,%ecx), %eax
; X86-NEXT: retl
;
-; SSE-LABEL: extractSub64_16:
-; SSE: # %bb.0:
-; SSE-NEXT: movl %esi, %ecx
-; SSE-NEXT: movq (%rdi), %rax
-; SSE-NEXT: andb $48, %cl
-; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
-; SSE-NEXT: shrq %cl, %rax
-; SSE-NEXT: # kill: def $ax killed $ax killed $rax
-; SSE-NEXT: retq
-;
-; AVX-LABEL: extractSub64_16:
-; AVX: # %bb.0:
-; AVX-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX-NEXT: andb $48, %sil
-; AVX-NEXT: shrxq %rsi, (%rdi), %rax
-; AVX-NEXT: # kill: def $ax killed $ax killed $rax
-; AVX-NEXT: retq
+; X64-LABEL: extractSub64_16:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andl $48, %esi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: movzwl (%rdi,%rsi), %eax
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 63
%idx_align = and i32 %idx_bounds, -16
%sh = zext nneg i32 %idx_align to i64
@@ -60,67 +38,20 @@ define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind {
define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub128_16:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
-; X86-NEXT: movzbl 12(%ebp), %eax
-; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %esi
-; X86-NEXT: movl 8(%ecx), %edi
-; X86-NEXT: movl 12(%ecx), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, (%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andb $16, %cl
-; X86-NEXT: shrb $3, %al
-; X86-NEXT: andb $12, %al
-; X86-NEXT: movzbl %al, %edx
-; X86-NEXT: movl (%esp,%edx), %eax
-; X86-NEXT: movl 4(%esp,%edx), %edx
-; X86-NEXT: shrdl %cl, %edx, %eax
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: leal -8(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $112, %ecx
+; X86-NEXT: shrl $3, %ecx
+; X86-NEXT: movzwl (%eax,%ecx), %eax
; X86-NEXT: retl
;
-; SSE-LABEL: extractSub128_16:
-; SSE: # %bb.0:
-; SSE-NEXT: movq (%rdi), %rax
-; SSE-NEXT: movq 8(%rdi), %rdx
-; SSE-NEXT: movl %esi, %ecx
-; SSE-NEXT: andb $48, %cl
-; SSE-NEXT: movq %rdx, %rdi
-; SSE-NEXT: shrq %cl, %rdi
-; SSE-NEXT: shrdq %cl, %rdx, %rax
-; SSE-NEXT: testb $64, %sil
-; SSE-NEXT: cmovneq %rdi, %rax
-; SSE-NEXT: # kill: def $ax killed $ax killed $rax
-; SSE-NEXT: retq
-;
-; AVX-LABEL: extractSub128_16:
-; AVX: # %bb.0:
-; AVX-NEXT: movq (%rdi), %rdx
-; AVX-NEXT: movq 8(%rdi), %rax
-; AVX-NEXT: movl %esi, %ecx
-; AVX-NEXT: andb $48, %cl
-; AVX-NEXT: shrdq %cl, %rax, %rdx
-; AVX-NEXT: shrxq %rcx, %rax, %rax
-; AVX-NEXT: testb $64, %sil
-; AVX-NEXT: cmoveq %rdx, %rax
-; AVX-NEXT: # kill: def $ax killed $ax killed $rax
-; AVX-NEXT: retq
+; X64-LABEL: extractSub128_16:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andl $112, %esi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: movzwl (%rdi,%rsi), %eax
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 127
%idx_align = and i32 %idx_bounds, -16
%sh = zext nneg i32 %idx_align to i128
@@ -133,62 +64,20 @@ define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind {
define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub128_32:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
-; X86-NEXT: movzbl 12(%ebp), %eax
-; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %esi
-; X86-NEXT: movl 8(%ecx), %edi
-; X86-NEXT: movl 12(%ecx), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, (%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: andb $96, %al
-; X86-NEXT: shrb $3, %al
-; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: movl (%esp,%eax), %eax
-; X86-NEXT: leal -8(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $96, %ecx
+; X86-NEXT: shrl $3, %ecx
+; X86-NEXT: movl (%eax,%ecx), %eax
; X86-NEXT: retl
;
-; SSE-LABEL: extractSub128_32:
-; SSE: # %bb.0:
-; SSE-NEXT: movq (%rdi), %rax
-; SSE-NEXT: movq 8(%rdi), %rdx
-; SSE-NEXT: movl %esi, %ecx
-; SSE-NEXT: andb $32, %cl
-; SSE-NEXT: movq %rdx, %rdi
-; SSE-NEXT: shrq %cl, %rdi
-; SSE-NEXT: shrdq %cl, %rdx, %rax
-; SSE-NEXT: testb $64, %sil
-; SSE-NEXT: cmovneq %rdi, %rax
-; SSE-NEXT: # kill: def $eax killed $eax killed $rax
-; SSE-NEXT: retq
-;
-; AVX-LABEL: extractSub128_32:
-; AVX: # %bb.0:
-; AVX-NEXT: movq (%rdi), %rdx
-; AVX-NEXT: movq 8(%rdi), %rax
-; AVX-NEXT: movl %esi, %ecx
-; AVX-NEXT: andb $32, %cl
-; AVX-NEXT: shrdq %cl, %rax, %rdx
-; AVX-NEXT: shrxq %rcx, %rax, %rax
-; AVX-NEXT: testb $64, %sil
-; AVX-NEXT: cmoveq %rdx, %rax
-; AVX-NEXT: # kill: def $eax killed $eax killed $rax
-; AVX-NEXT: retq
+; X64-LABEL: extractSub128_32:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andl $96, %esi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: movl (%rdi,%rsi), %eax
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 127
%idx_align = and i32 %idx_bounds, -32
%sh = zext nneg i32 %idx_align to i128
@@ -201,46 +90,20 @@ define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind {
define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub128_64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $32, %esp
-; X86-NEXT: movzbl 12(%ebp), %eax
-; X86-NEXT: movl 8(%ebp), %ecx
-; X86-NEXT: movl (%ecx), %edx
-; X86-NEXT: movl 4(%ecx), %esi
-; X86-NEXT: movl 8(%ecx), %edi
-; X86-NEXT: movl 12(%ecx), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, (%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: andb $64, %al
-; X86-NEXT: shrb $3, %al
-; X86-NEXT: movzbl %al, %ecx
-; X86-NEXT: movl (%esp,%ecx), %eax
-; X86-NEXT: movl 4(%esp,%ecx), %edx
-; X86-NEXT: leal -8(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: andl $64, %edx
+; X86-NEXT: shrl $3, %edx
+; X86-NEXT: movl (%ecx,%edx), %eax
+; X86-NEXT: movl 4(%ecx,%edx), %edx
; X86-NEXT: retl
;
; X64-LABEL: extractSub128_64:
; X64: # %bb.0:
-; X64-NEXT: testb $64, %sil
-; X64-NEXT: je .LBB3_1
-; X64-NEXT: # %bb.2:
-; X64-NEXT: movq 8(%rdi), %rax
-; X64-NEXT: retq
-; X64-NEXT: .LBB3_1:
-; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andl $64, %esi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: movq (%rdi,%rsi), %rax
; X64-NEXT: retq
%idx_bounds = and i32 %idx, 127
%idx_align = and i32 %idx_bounds, -64
@@ -254,185 +117,20 @@ define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind {
define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub512_8:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $192, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl (%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 4(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 8(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 12(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 16(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 20(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 24(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 32(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%eax), %ebx
-; X86-NEXT: movl 44(%eax), %edi
-; X86-NEXT: movl 48(%eax), %esi
-; X86-NEXT: movl 52(%eax), %edx
-; X86-NEXT: movl 56(%eax), %ecx
-; X86-NEXT: movl 60(%eax), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 12(%ebp), %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: andl $24, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: shrl $3, %edx
-; X86-NEXT: andl $60, %edx
-; X86-NEXT: movl 48(%esp,%edx), %eax
-; X86-NEXT: movl 52(%esp,%edx), %edx
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shrdl %cl, %edx, %eax
-; X86-NEXT: # kill: def $al killed $al killed $eax
-; X86-NEXT: leal -12(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: shrl $3, %ecx
+; X86-NEXT: andl $63, %ecx
+; X86-NEXT: movzbl (%eax,%ecx), %eax
; X86-NEXT: retl
;
-; SSE-LABEL: extractSub512_8:
-; SSE: # %bb.0:
-; SSE-NEXT: pushq %rax
-; SSE-NEXT: # kill: def $esi killed $esi def $rsi
-; SSE-NEXT: movups (%rdi), %xmm0
-; SSE-NEXT: movups 16(%rdi), %xmm1
-; SSE-NEXT: movups 32(%rdi), %xmm2
-; SSE-NEXT: movups 48(%rdi), %xmm3
-; SSE-NEXT: xorps %xmm4, %xmm4
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movl %esi, %ecx
-; SSE-NEXT: andl $56, %ecx
-; SSE-NEXT: shrl $3, %esi
-; SSE-NEXT: andl $56, %esi
-; SSE-NEXT: movq -128(%rsp,%rsi), %rdx
-; SSE-NEXT: shrq %cl, %rdx
-; SSE-NEXT: movl -120(%rsp,%rsi), %eax
-; SSE-NEXT: addl %eax, %eax
-; SSE-NEXT: notl %ecx
-; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
-; SSE-NEXT: shlq %cl, %rax
-; SSE-NEXT: orl %edx, %eax
-; SSE-NEXT: # kill: def $al killed $al killed $rax
-; SSE-NEXT: popq %rcx
-; SSE-NEXT: retq
-;
-; AVX2-LABEL: extractSub512_8:
-; AVX2: # %bb.0:
-; AVX2-NEXT: pushq %rax
-; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX2-NEXT: vmovups (%rdi), %ymm0
-; AVX2-NEXT: vmovups 32(%rdi), %ymm1
-; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: movl %esi, %ecx
-; AVX2-NEXT: andl $56, %ecx
-; AVX2-NEXT: shrl $3, %esi
-; AVX2-NEXT: andl $56, %esi
-; AVX2-NEXT: shrxq %rcx, -128(%rsp,%rsi), %rax
-; AVX2-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX2-NEXT: notl %ecx
-; AVX2-NEXT: movl -120(%rsp,%rsi), %edx
-; AVX2-NEXT: addl %edx, %edx
-; AVX2-NEXT: shlxq %rcx, %rdx, %rcx
-; AVX2-NEXT: orl %ecx, %eax
-; AVX2-NEXT: # kill: def $al killed $al killed $rax
-; AVX2-NEXT: popq %rcx
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: extractSub512_8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: pushq %rax
-; AVX512-NEXT: vmovups (%rdi), %ymm0
-; AVX512-NEXT: vmovups 32(%rdi), %ymm1
-; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX512-NEXT: movl %esi, %ecx
-; AVX512-NEXT: andl $56, %ecx
-; AVX512-NEXT: shrl $3, %esi
-; AVX512-NEXT: andl $56, %esi
-; AVX512-NEXT: shrxq %rcx, -128(%rsp,%rsi), %rax
-; AVX512-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
-; AVX512-NEXT: notl %ecx
-; AVX512-NEXT: movl -120(%rsp,%rsi), %edx
-; AVX512-NEXT: addl %edx, %edx
-; AVX512-NEXT: shlxq %rcx, %rdx, %rcx
-; AVX512-NEXT: orl %ecx, %eax
-; AVX512-NEXT: # kill: def $al killed $al killed $rax
-; AVX512-NEXT: popq %rcx
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; X64-LABEL: extractSub512_8:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: andl $63, %esi
+; X64-NEXT: movzbl (%rdi,%rsi), %eax
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 511
%idx_align = and i32 %idx_bounds, -8
%ld = load i512, ptr %word, align 8
@@ -445,152 +143,21 @@ define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind {
define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub512_64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $192, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl (%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 4(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 8(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 12(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 16(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 20(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 24(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 32(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%eax), %ebx
-; X86-NEXT: movl 44(%eax), %edi
-; X86-NEXT: movl 48(%eax), %esi
-; X86-NEXT: movl 52(%eax), %edx
-; X86-NEXT: movl 56(%eax), %ecx
-; X86-NEXT: movl 60(%eax), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: shrl $3, %ecx
-; X86-NEXT: andl $56, %ecx
-; X86-NEXT: movl 48(%esp,%ecx), %eax
-; X86-NEXT: movl 52(%esp,%ecx), %edx
-; X86-NEXT: leal -12(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: shrl $3, %edx
+; X86-NEXT: andl $56, %edx
+; X86-NEXT: movl (%ecx,%edx), %eax
+; X86-NEXT: movl 4(%ecx,%edx), %edx
; X86-NEXT: retl
;
-; SSE-LABEL: extractSub512_64:
-; SSE: # %bb.0:
-; SSE-NEXT: pushq %rax
-; SSE-NEXT: # kill: def $esi killed $esi def $rsi
-; SSE-NEXT: movups (%rdi), %xmm0
-; SSE-NEXT: movups 16(%rdi), %xmm1
-; SSE-NEXT: movups 32(%rdi), %xmm2
-; SSE-NEXT: movups 48(%rdi), %xmm3
-; SSE-NEXT: xorps %xmm4, %xmm4
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: shrl $3, %esi
-; SSE-NEXT: andl $56, %esi
-; SSE-NEXT: movq -128(%rsp,%rsi), %rax
-; SSE-NEXT: popq %rcx
-; SSE-NEXT: retq
-;
-; AVX2-LABEL: extractSub512_64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: pushq %rax
-; AVX2-NEXT: vmovups (%rdi), %ymm0
-; AVX2-NEXT: vmovups 32(%rdi), %ymm1
-; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX2-NEXT: shrl $3, %esi
-; AVX2-NEXT: andl $56, %esi
-; AVX2-NEXT: movq -128(%rsp,%rsi), %rax
-; AVX2-NEXT: popq %rcx
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: extractSub512_64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: pushq %rax
-; AVX512-NEXT: vmovups (%rdi), %ymm0
-; AVX512-NEXT: vmovups 32(%rdi), %ymm1
-; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: shrl $3, %esi
-; AVX512-NEXT: andl $56, %esi
-; AVX512-NEXT: movq -128(%rsp,%rsi), %rax
-; AVX512-NEXT: popq %rcx
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; X64-LABEL: extractSub512_64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: andl $56, %esi
+; X64-NEXT: movq (%rdi,%rsi), %rax
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 511
%idx_align = and i32 %idx_bounds, -64
%sh = zext nneg i32 %idx_align to i512
@@ -603,143 +170,35 @@ define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind {
define i128 @extractSub512_128(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub512_128:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $192, %esp
-; X86-NEXT: movl 12(%ebp), %eax
-; X86-NEXT: movl (%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 4(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 8(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 12(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 16(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 20(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 24(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 32(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%eax), %ebx
-; X86-NEXT: movl 44(%eax), %edi
-; X86-NEXT: movl 48(%eax), %esi
-; X86-NEXT: movl 52(%eax), %edx
-; X86-NEXT: movl 56(%eax), %ecx
-; X86-NEXT: movl 60(%eax), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %edi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: shrl $3, %edi
-; X86-NEXT: andl $48, %edi
-; X86-NEXT: movl 48(%esp,%edi), %ecx
-; X86-NEXT: movl 52(%esp,%edi), %edx
-; X86-NEXT: movl 56(%esp,%edi), %esi
-; X86-NEXT: movl 60(%esp,%edi), %edi
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
-; X86-NEXT: movl %edx, 4(%eax)
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: shrl $3, %edx
+; X86-NEXT: andl $48, %edx
+; X86-NEXT: movl (%ecx,%edx), %esi
+; X86-NEXT: movl 4(%ecx,%edx), %edi
+; X86-NEXT: movl 8(%ecx,%edx), %ebx
+; X86-NEXT: movl 12(%ecx,%edx), %ecx
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
-; SSE-LABEL: extractSub512_128:
-; SSE: # %bb.0:
-; SSE-NEXT: pushq %rax
-; SSE-NEXT: # kill: def $esi killed $esi def $rsi
-; SSE-NEXT: movups (%rdi), %xmm0
-; SSE-NEXT: movups 16(%rdi), %xmm1
-; SSE-NEXT: movups 32(%rdi), %xmm2
-; SSE-NEXT: movups 48(%rdi), %xmm3
-; SSE-NEXT: xorps %xmm4, %xmm4
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: shrl $3, %esi
-; SSE-NEXT: andl $48, %esi
-; SSE-NEXT: movq -128(%rsp,%rsi), %rax
-; SSE-NEXT: movq -120(%rsp,%rsi), %rdx
-; SSE-NEXT: popq %rcx
-; SSE-NEXT: retq
-;
-; AVX-LABEL: extractSub512_128:
-; AVX: # %bb.0:
-; AVX-NEXT: pushq %rax
-; AVX-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX-NEXT: vmovups (%rdi), %ymm0
-; AVX-NEXT: vmovups 32(%rdi), %ymm1
-; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: shrl $3, %esi
-; AVX-NEXT: andl $48, %esi
-; AVX-NEXT: movq -128(%rsp,%rsi), %rax
-; AVX-NEXT: movq -120(%rsp,%rsi), %rdx
-; AVX-NEXT: popq %rcx
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
+; X64-LABEL: extractSub512_128:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: andl $48, %esi
+; X64-NEXT: movq (%rdi,%rsi), %rax
+; X64-NEXT: movq 8(%rdi,%rsi), %rdx
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 511
%idx_align = and i32 %idx_bounds, -128
%sh = zext nneg i32 %idx_align to i512
@@ -752,916 +211,21 @@ define i128 @extractSub512_128(ptr %word, i32 %idx) nounwind {
define i64 @extractSub4096_64(ptr %word, i32 %idx) nounwind {
; X86-LABEL: extractSub4096_64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp
-; X86-NEXT: movl %esp, %ebp
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $1536, %esp # imm = 0x600
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 4(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 8(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 12(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 16(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 20(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 24(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 28(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 32(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 36(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 44(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 48(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 52(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 56(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 60(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 64(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 68(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 72(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 76(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 80(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 84(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 88(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 92(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 96(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 100(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 104(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 108(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 112(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 116(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 120(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 124(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 128(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 132(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 136(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 140(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 144(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 148(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 152(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 156(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 160(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 164(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 168(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 172(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 176(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 180(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 184(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 188(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 192(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 196(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 200(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 204(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 208(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 212(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 216(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 220(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 224(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 228(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 232(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 236(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 240(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 244(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 248(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 252(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 256(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 260(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 264(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 268(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 272(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 276(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 280(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 284(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 288(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 292(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 296(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 300(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 304(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 308(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 312(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 316(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 320(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 324(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 328(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 332(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 336(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 340(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 344(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 348(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 352(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 356(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 360(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 364(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 368(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 372(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 376(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 380(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl (%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 384(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 388(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 392(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 396(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 400(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 404(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 408(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 412(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 416(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 420(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 424(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 428(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 432(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 436(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 440(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 444(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 448(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 452(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 456(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 460(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 464(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 468(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 472(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 476(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 480(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 484(%eax), %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 488(%eax), %ebx
-; X86-NEXT: movl 492(%eax), %edi
-; X86-NEXT: movl 496(%eax), %esi
-; X86-NEXT: movl 500(%eax), %edx
-; X86-NEXT: movl 504(%eax), %ecx
-; X86-NEXT: movl 508(%eax), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $4032, %ecx # imm = 0xFC0
-; X86-NEXT: andl 12(%ebp), %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: shrl $3, %ecx
-; X86-NEXT: movl 496(%esp,%ecx), %eax
-; X86-NEXT: movl 500(%esp,%ecx), %edx
-; X86-NEXT: leal -12(%ebp), %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $4032, %edx # imm = 0xFC0
+; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: shrl $3, %edx
+; X86-NEXT: movl (%ecx,%edx), %eax
+; X86-NEXT: movl 4(%ecx,%edx), %edx
; X86-NEXT: retl
;
-; SSE-LABEL: extractSub4096_64:
-; SSE: # %bb.0:
-; SSE-NEXT: subq $1176, %rsp # imm = 0x498
-; SSE-NEXT: # kill: def $esi killed $esi def $rsi
-; SSE-NEXT: movups (%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 16(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 32(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 48(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 64(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 80(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 96(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 112(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 128(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; SSE-NEXT: movups 144(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 160(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 176(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 192(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 208(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 224(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 240(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 256(%rdi), %xmm0
-; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; SSE-NEXT: movups 272(%rdi), %xmm15
-; SSE-NEXT: movups 288(%rdi), %xmm14
-; SSE-NEXT: movups 304(%rdi), %xmm13
-; SSE-NEXT: movups 320(%rdi), %xmm12
-; SSE-NEXT: movups 336(%rdi), %xmm11
-; SSE-NEXT: movups 352(%rdi), %xmm10
-; SSE-NEXT: movups 368(%rdi), %xmm9
-; SSE-NEXT: movups 384(%rdi), %xmm8
-; SSE-NEXT: movups 400(%rdi), %xmm7
-; SSE-NEXT: movups 416(%rdi), %xmm6
-; SSE-NEXT: movups 432(%rdi), %xmm5
-; SSE-NEXT: movups 448(%rdi), %xmm4
-; SSE-NEXT: movups 464(%rdi), %xmm3
-; SSE-NEXT: movups 480(%rdi), %xmm2
-; SSE-NEXT: movups 496(%rdi), %xmm1
-; SSE-NEXT: xorps %xmm0, %xmm0
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm2, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm3, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm4, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm5, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm8, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm9, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm10, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm11, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm12, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm13, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm14, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps %xmm15, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
-; SSE-NEXT: andl $4032, %esi # imm = 0xFC0
-; SSE-NEXT: shrl $3, %esi
-; SSE-NEXT: movq 144(%rsp,%rsi), %rax
-; SSE-NEXT: addq $1176, %rsp # imm = 0x498
-; SSE-NEXT: retq
-;
-; AVX2-LABEL: extractSub4096_64:
-; AVX2: # %bb.0:
-; AVX2-NEXT: subq $936, %rsp # imm = 0x3A8
-; AVX2-NEXT: vmovups (%rdi), %ymm0
-; AVX2-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-NEXT: vmovups 32(%rdi), %ymm1
-; AVX2-NEXT: vmovups 64(%rdi), %ymm2
-; AVX2-NEXT: vmovups 96(%rdi), %ymm3
-; AVX2-NEXT: vmovups 128(%rdi), %ymm4
-; AVX2-NEXT: vmovups 160(%rdi), %ymm5
-; AVX2-NEXT: vmovups 192(%rdi), %ymm6
-; AVX2-NEXT: vmovups 224(%rdi), %ymm7
-; AVX2-NEXT: vmovups 256(%rdi), %ymm8
-; AVX2-NEXT: vmovups 288(%rdi), %ymm9
-; AVX2-NEXT: vmovups 320(%rdi), %ymm10
-; AVX2-NEXT: vmovups 352(%rdi), %ymm11
-; AVX2-NEXT: vmovups 384(%rdi), %ymm12
-; AVX2-NEXT: vmovups 416(%rdi), %ymm13
-; AVX2-NEXT: vmovups 448(%rdi), %ymm14
-; AVX2-NEXT: vmovups 480(%rdi), %ymm15
-; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm15, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm14, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm13, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm12, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm11, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm10, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm9, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm8, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm4, {{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm3, (%rsp)
-; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
-; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX2-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX2-NEXT: andl $4032, %esi # imm = 0xFC0
-; AVX2-NEXT: shrl $3, %esi
-; AVX2-NEXT: movq -96(%rsp,%rsi), %rax
-; AVX2-NEXT: addq $936, %rsp # imm = 0x3A8
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: extractSub4096_64:
-; AVX512: # %bb.0:
-; AVX512-NEXT: subq $904, %rsp # imm = 0x388
-; AVX512-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX512-NEXT: vmovups (%rdi), %ymm0
-; AVX512-NEXT: vmovups 32(%rdi), %ymm1
-; AVX512-NEXT: vmovups 64(%rdi), %ymm2
-; AVX512-NEXT: vmovups 96(%rdi), %ymm3
-; AVX512-NEXT: vmovups 128(%rdi), %ymm4
-; AVX512-NEXT: vmovups 160(%rdi), %ymm5
-; AVX512-NEXT: vmovups 192(%rdi), %ymm6
-; AVX512-NEXT: vmovups 224(%rdi), %ymm7
-; AVX512-NEXT: vmovups 256(%rdi), %ymm8
-; AVX512-NEXT: vmovups 288(%rdi), %ymm9
-; AVX512-NEXT: vmovups 320(%rdi), %ymm10
-; AVX512-NEXT: vmovups 352(%rdi), %ymm11
-; AVX512-NEXT: vmovups 384(%rdi), %ymm12
-; AVX512-NEXT: vmovups 416(%rdi), %ymm13
-; AVX512-NEXT: andl $4032, %esi # imm = 0xFC0
-; AVX512-NEXT: vmovups 448(%rdi), %ymm14
-; AVX512-NEXT: vmovups 480(%rdi), %ymm15
-; AVX512-NEXT: vxorps %xmm16, %xmm16, %xmm16
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm16, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm15, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm14, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm13, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm12, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm11, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm10, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm9, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm8, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm4, (%rsp)
-; AVX512-NEXT: vmovups %ymm3, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: shrl $3, %esi
-; AVX512-NEXT: movq -128(%rsp,%rsi), %rax
-; AVX512-NEXT: addq $904, %rsp # imm = 0x388
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; X64-LABEL: extractSub4096_64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andl $4032, %esi # imm = 0xFC0
+; X64-NEXT: shrl $3, %esi
+; X64-NEXT: movq (%rdi,%rsi), %rax
+; X64-NEXT: retq
%idx_bounds = and i32 %idx, 4095
%idx_align = and i32 %idx_bounds, -64
%sh = zext nneg i32 %idx_align to i4096
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
index 81c4d5d..c3054a3 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
@@ -962,39 +962,22 @@ define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i6
}
define void @load_1byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; X64-NO-BMI2-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half:
-; X64-NO-BMI2: # %bb.0:
-; X64-NO-BMI2-NEXT: movups (%rdi), %xmm0
-; X64-NO-BMI2-NEXT: xorps %xmm1, %xmm1
-; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
-; X64-NO-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; X64-NO-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; X64-NO-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; X64-NO-BMI2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NO-BMI2-NEXT: movl %ecx, %eax
-; X64-NO-BMI2-NEXT: shrb $6, %al
-; X64-NO-BMI2-NEXT: movzbl %al, %eax
-; X64-NO-BMI2-NEXT: movq -72(%rsp,%rax,8), %rax
-; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NO-BMI2-NEXT: shrq %cl, %rax
-; X64-NO-BMI2-NEXT: movb %al, (%rdx)
-; X64-NO-BMI2-NEXT: retq
-;
-; X64-BMI2-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half:
-; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movups (%rdi), %xmm0
-; X64-BMI2-NEXT: xorps %xmm1, %xmm1
-; X64-BMI2-NEXT: shll $3, %esi
-; X64-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; X64-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; X64-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; X64-BMI2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-BMI2-NEXT: movl %esi, %eax
-; X64-BMI2-NEXT: shrb $6, %al
-; X64-BMI2-NEXT: movzbl %al, %eax
-; X64-BMI2-NEXT: shrxq %rsi, -72(%rsp,%rax,8), %rax
-; X64-BMI2-NEXT: movb %al, (%rdx)
-; X64-BMI2-NEXT: retq
+; X64-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half:
+; X64: # %bb.0:
+; X64-NEXT: movups (%rdi), %xmm0
+; X64-NEXT: xorps %xmm1, %xmm1
+; X64-NEXT: leal (,%rsi,8), %eax
+; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: shrb $6, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: leaq -72(%rsp,%rax,8), %rax
+; X64-NEXT: andl $7, %esi
+; X64-NEXT: movzbl (%rsi,%rax), %eax
+; X64-NEXT: movb %al, (%rdx)
+; X64-NEXT: retq
;
; X86-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca_with_zero_upper_half:
; X86-NO-BMI2-NO-SHLD: # %bb.0:
@@ -3417,7 +3400,6 @@ define void @load_32byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; ALL: {{.*}}
-; X64: {{.*}}
; X64-NO-SHLD: {{.*}}
; X86: {{.*}}
; X86-HAVE-BMI2-HAVE-SHLD: {{.*}}
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
index 8d36eef..84c2cc6 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca.ll
@@ -1220,41 +1220,23 @@ define void @load_8byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
; no @load_16byte_chunk_of_16byte_alloca
define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst) nounwind {
-; X64-NO-BMI2-LABEL: load_1byte_chunk_of_32byte_alloca:
-; X64-NO-BMI2: # %bb.0:
-; X64-NO-BMI2-NEXT: movups (%rdi), %xmm0
-; X64-NO-BMI2-NEXT: movups 16(%rdi), %xmm1
-; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
-; X64-NO-BMI2-NEXT: xorps %xmm2, %xmm2
-; X64-NO-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
-; X64-NO-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
-; X64-NO-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; X64-NO-BMI2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NO-BMI2-NEXT: movl %ecx, %eax
-; X64-NO-BMI2-NEXT: shrb $6, %al
-; X64-NO-BMI2-NEXT: movzbl %al, %eax
-; X64-NO-BMI2-NEXT: movq -72(%rsp,%rax,8), %rax
-; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NO-BMI2-NEXT: shrq %cl, %rax
-; X64-NO-BMI2-NEXT: movb %al, (%rdx)
-; X64-NO-BMI2-NEXT: retq
-;
-; X64-BMI2-LABEL: load_1byte_chunk_of_32byte_alloca:
-; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movups (%rdi), %xmm0
-; X64-BMI2-NEXT: movups 16(%rdi), %xmm1
-; X64-BMI2-NEXT: shll $3, %esi
-; X64-BMI2-NEXT: xorps %xmm2, %xmm2
-; X64-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
-; X64-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
-; X64-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; X64-BMI2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-BMI2-NEXT: movl %esi, %eax
-; X64-BMI2-NEXT: shrb $6, %al
-; X64-BMI2-NEXT: movzbl %al, %eax
-; X64-BMI2-NEXT: shrxq %rsi, -72(%rsp,%rax,8), %rax
-; X64-BMI2-NEXT: movb %al, (%rdx)
-; X64-BMI2-NEXT: retq
+; X64-LABEL: load_1byte_chunk_of_32byte_alloca:
+; X64: # %bb.0:
+; X64-NEXT: movups (%rdi), %xmm0
+; X64-NEXT: movups 16(%rdi), %xmm1
+; X64-NEXT: leal (,%rsi,8), %eax
+; X64-NEXT: xorps %xmm2, %xmm2
+; X64-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: shrb $6, %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: leaq -72(%rsp,%rax,8), %rax
+; X64-NEXT: andl $7, %esi
+; X64-NEXT: movzbl (%rsi,%rax), %eax
+; X64-NEXT: movb %al, (%rdx)
+; X64-NEXT: retq
;
; X86-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca:
; X86-NO-BMI2-NO-SHLD: # %bb.0:
@@ -2156,7 +2138,6 @@ define void @load_16byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst
; no @load_32byte_chunk_of_32byte_alloca
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; ALL: {{.*}}
-; X64: {{.*}}
; X64-NO-SHLD: {{.*}}
; X86: {{.*}}
; X86-NO-SHLD: {{.*}}
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt
index 054489c..f5cb4b7 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt
@@ -286,6 +286,9 @@
#CHECK: xvmulhuh 4, 5, 7
0xf0,0x85,0x3b,0xd0
+#CHECK: mtlpl 3, 4
+0x7c,0x80,0x1a,0x26
+
#CHECK: xxmulmul 8, 3, 4, 2
0xed,0x03,0x22,0x08
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt
index 17d1413..f0df8ce 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt
@@ -280,6 +280,9 @@
#CHECK: xvmulhuh 4, 5, 7
0xd0,0x3b,0x85,0xf0
+#CHECK: mtlpl 3, 4
+0x26,0x1a,0x80,0x7c
+
#CHECK: xxmulmul 8, 3, 4, 2
0x08,0x22,0x03,0xed
diff --git a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s
index e5bc1f4..bc0683e 100644
--- a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s
+++ b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s
@@ -403,6 +403,10 @@
#CHECK-BE: xvmulhuh 4, 5, 7 # encoding: [0xf0,0x85,0x3b,0xd0]
#CHECK-LE: xvmulhuh 4, 5, 7 # encoding: [0xd0,0x3b,0x85,0xf0]
+ mtlpl 3, 4
+#CHECK-BE: mtlpl 3, 4 # encoding: [0x7c,0x80,0x1a,0x26]
+#CHECK-LE: mtlpl 3, 4 # encoding: [0x26,0x1a,0x80,0x7c]
+
xxmulmul 8, 3, 4, 2
#CHECK-BE: xxmulmul 8, 3, 4, 2 # encoding: [0xed,0x03,0x22,0x08]
#CHECK-LE: xxmulmul 8, 3, 4, 2 # encoding: [0x08,0x22,0x03,0xed]
diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll
index 7b0b871..cc87d65 100644
--- a/llvm/test/Transforms/InstCombine/assume.ll
+++ b/llvm/test/Transforms/InstCombine/assume.ll
@@ -10,8 +10,8 @@ declare void @llvm.assume(i1) #1
; Check that the assume has not been removed:
-define i32 @foo1(ptr %a) #0 {
-; DEFAULT-LABEL: @foo1(
+define i32 @align_to_bundle(ptr %a) #0 {
+; DEFAULT-LABEL: @align_to_bundle(
; DEFAULT-NEXT: [[T0:%.*]] = load i32, ptr [[A:%.*]], align 4
; DEFAULT-NEXT: [[PTRINT:%.*]] = ptrtoint ptr [[A]] to i64
; DEFAULT-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
@@ -19,7 +19,7 @@ define i32 @foo1(ptr %a) #0 {
; DEFAULT-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; DEFAULT-NEXT: ret i32 [[T0]]
;
-; BUNDLES-LABEL: @foo1(
+; BUNDLES-LABEL: @align_to_bundle(
; BUNDLES-NEXT: [[T0:%.*]] = load i32, ptr [[A:%.*]], align 4
; BUNDLES-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 32) ]
; BUNDLES-NEXT: ret i32 [[T0]]
@@ -32,6 +32,28 @@ define i32 @foo1(ptr %a) #0 {
ret i32 %t0
}
+define i32 @align_to_bundle_ptrtoaddr(ptr %a) #0 {
+; DEFAULT-LABEL: @align_to_bundle_ptrtoaddr(
+; DEFAULT-NEXT: [[T0:%.*]] = load i32, ptr [[A:%.*]], align 4
+; DEFAULT-NEXT: [[PTRINT:%.*]] = ptrtoaddr ptr [[A]] to i64
+; DEFAULT-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+; DEFAULT-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; DEFAULT-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
+; DEFAULT-NEXT: ret i32 [[T0]]
+;
+; BUNDLES-LABEL: @align_to_bundle_ptrtoaddr(
+; BUNDLES-NEXT: [[T0:%.*]] = load i32, ptr [[A:%.*]], align 4
+; BUNDLES-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 32) ]
+; BUNDLES-NEXT: ret i32 [[T0]]
+;
+ %t0 = load i32, ptr %a, align 4
+ %ptrint = ptrtoaddr ptr %a to i64
+ %maskedptr = and i64 %ptrint, 31
+ %maskcond = icmp eq i64 %maskedptr, 0
+ tail call void @llvm.assume(i1 %maskcond)
+ ret i32 %t0
+}
+
define i32 @align_assume_trunc_cond(ptr %a) #0 {
; DEFAULT-LABEL: @align_assume_trunc_cond(
; DEFAULT-NEXT: [[T0:%.*]] = load i32, ptr [[A:%.*]], align 4
diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
index a7434a2..adf3aa1 100644
--- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
+++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
@@ -237,3 +237,75 @@ define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(ptr addrspace(
call void @use.i32(i32 %addr)
ret ptr addrspace(1) %gep
}
+
+define i64 @ptrtoaddr_of_ptrmask(ptr %p, i64 %mask) {
+; CHECK-LABEL: define i64 @ptrtoaddr_of_ptrmask(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[MASK:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoaddr ptr [[P]] to i64
+; CHECK-NEXT: [[ADDR:%.*]] = and i64 [[MASK]], [[TMP1]]
+; CHECK-NEXT: ret i64 [[ADDR]]
+;
+ %masked = call ptr @llvm.ptrmask(ptr %p, i64 %mask)
+ %addr = ptrtoaddr ptr %masked to i64
+ ret i64 %addr
+}
+
+define i32 @ptrtoaddr_of_ptrmask_addrsize(ptr addrspace(1) %p, i32 %mask) {
+; CHECK-LABEL: define i32 @ptrtoaddr_of_ptrmask_addrsize(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[MASK:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoaddr ptr addrspace(1) [[P]] to i32
+; CHECK-NEXT: [[ADDR:%.*]] = and i32 [[MASK]], [[TMP1]]
+; CHECK-NEXT: ret i32 [[ADDR]]
+;
+ %masked = call ptr addrspace(1) @llvm.ptrmask(ptr addrspace(1) %p, i32 %mask)
+ %addr = ptrtoaddr ptr addrspace(1) %masked to i32
+ ret i32 %addr
+}
+
+define i64 @ptrtoaddr_of_gep_of_inttoptr(i64 %int, i64 %offset) {
+; CHECK-LABEL: define i64 @ptrtoaddr_of_gep_of_inttoptr(
+; CHECK-SAME: i64 [[INT:%.*]], i64 [[OFFSET:%.*]]) {
+; CHECK-NEXT: [[ADDR:%.*]] = add i64 [[INT]], [[OFFSET]]
+; CHECK-NEXT: ret i64 [[ADDR]]
+;
+ %ptr = inttoptr i64 %int to ptr
+ %gep = getelementptr i8, ptr %ptr, i64 %offset
+ %addr = ptrtoaddr ptr %gep to i64
+ ret i64 %addr
+}
+
+; FIXME: This could be supported by truncating %int before performing the
+; arithmetic.
+define i32 @ptrtoaddr_of_gep_of_inttoptr_addrsize(i64 %int, i32 %offset) {
+; CHECK-LABEL: define i32 @ptrtoaddr_of_gep_of_inttoptr_addrsize(
+; CHECK-SAME: i64 [[INT:%.*]], i32 [[OFFSET:%.*]]) {
+; CHECK-NEXT: [[PTR:%.*]] = inttoptr i64 [[INT]] to ptr addrspace(1)
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[PTR]], i32 [[OFFSET]]
+; CHECK-NEXT: [[ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[GEP]] to i32
+; CHECK-NEXT: ret i32 [[ADDR]]
+;
+ %ptr = inttoptr i64 %int to ptr addrspace(1)
+ %gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %offset
+ %addr = ptrtoaddr ptr addrspace(1) %gep to i32
+ ret i32 %addr
+}
+
+define i64 @ptrtoaddr_of_gep_of_null(i64 %offset) {
+; CHECK-LABEL: define i64 @ptrtoaddr_of_gep_of_null(
+; CHECK-SAME: i64 [[OFFSET:%.*]]) {
+; CHECK-NEXT: ret i64 [[OFFSET]]
+;
+ %gep = getelementptr i8, ptr null, i64 %offset
+ %addr = ptrtoaddr ptr %gep to i64
+ ret i64 %addr
+}
+
+define i32 @ptrtoaddr_of_gep_of_null_addrsize(i32 %offset) {
+; CHECK-LABEL: define i32 @ptrtoaddr_of_gep_of_null_addrsize(
+; CHECK-SAME: i32 [[OFFSET:%.*]]) {
+; CHECK-NEXT: ret i32 [[OFFSET]]
+;
+ %gep = getelementptr i8, ptr addrspace(1) null, i32 %offset
+ %addr = ptrtoaddr ptr addrspace(1) %gep to i32
+ ret i32 %addr
+}
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll
index 9f9e3f9..77a7f0d 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/vecreduce.ll
@@ -1,26 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
-; RUN: opt < %s -passes=instsimplify -use-constant-int-for-fixed-length-splat -S | FileCheck %s
-
-declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a)
-declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a)
-declare i32 @llvm.vector.reduce.mul.v1i32(<1 x i32> %a)
-declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a)
-declare i32 @llvm.vector.reduce.and.v1i32(<1 x i32> %a)
-declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %a)
-declare i32 @llvm.vector.reduce.or.v1i32(<1 x i32> %a)
-declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a)
-declare i32 @llvm.vector.reduce.xor.v1i32(<1 x i32> %a)
-declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %a)
-declare i32 @llvm.vector.reduce.smin.v1i32(<1 x i32> %a)
-declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %a)
-declare i32 @llvm.vector.reduce.smax.v1i32(<1 x i32> %a)
-declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %a)
-declare i32 @llvm.vector.reduce.umin.v1i32(<1 x i32> %a)
-declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %a)
-declare i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a)
-declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %a)
-
+; RUN: opt < %s -passes=instsimplify -use-constant-int-for-fixed-length-splat -use-constant-int-for-scalable-splat -S | FileCheck %s
define i32 @add_0() {
; CHECK-LABEL: @add_0(
@@ -30,6 +10,15 @@ define i32 @add_0() {
ret i32 %x
}
+define i32 @add_0_scalable_vector() {
+; CHECK-LABEL: @add_0_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+ ret i32 %x
+}
+
define i32 @add_1() {
; CHECK-LABEL: @add_1(
; CHECK-NEXT: ret i32 8
@@ -38,6 +27,15 @@ define i32 @add_1() {
ret i32 %x
}
+define i32 @add_1_scalable_vector() {
+; CHECK-LABEL: @add_1_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+ ret i32 %x
+}
+
define i32 @add_inc() {
; CHECK-LABEL: @add_inc(
; CHECK-NEXT: ret i32 18
@@ -63,8 +61,17 @@ define i32 @add_undef() {
ret i32 %x
}
-define i32 @add_undef1() {
-; CHECK-LABEL: @add_undef1(
+define i32 @add_undef_scalable_vector() {
+; CHECK-LABEL: @add_undef_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> undef)
+ ret i32 %x
+}
+
+define i32 @add_undef_elt() {
+; CHECK-LABEL: @add_undef_elt(
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT: ret i32 [[X]]
;
@@ -80,8 +87,17 @@ define i32 @add_poison() {
ret i32 %x
}
-define i32 @add_poison1() {
-; CHECK-LABEL: @add_poison1(
+define i32 @add_poison_scalable_vector() {
+; CHECK-LABEL: @add_poison_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> poison)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> poison)
+ ret i32 %x
+}
+
+define i32 @add_poison_elt() {
+; CHECK-LABEL: @add_poison_elt(
; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> <i32 1, i32 1, i32 poison, i32 1, i32 1, i32 42, i32 1, i32 1>)
@@ -105,6 +121,15 @@ define i32 @mul_0() {
ret i32 %x
}
+define i32 @mul_0_scalable_vector() {
+; CHECK-LABEL: @mul_0_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+ ret i32 %x
+}
+
define i32 @mul_1() {
; CHECK-LABEL: @mul_1(
; CHECK-NEXT: ret i32 1
@@ -113,6 +138,15 @@ define i32 @mul_1() {
ret i32 %x
}
+define i32 @mul_1_scalable_vector() {
+; CHECK-LABEL: @mul_1_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+ ret i32 %x
+}
+
define i32 @mul_inc() {
; CHECK-LABEL: @mul_inc(
; CHECK-NEXT: ret i32 40320
@@ -138,8 +172,17 @@ define i32 @mul_undef() {
ret i32 %x
}
-define i32 @mul_undef1() {
-; CHECK-LABEL: @mul_undef1(
+define i32 @mul_undef_scalable_vector() {
+; CHECK-LABEL: @mul_undef_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> undef)
+ ret i32 %x
+}
+
+define i32 @mul_undef_elt() {
+; CHECK-LABEL: @mul_undef_elt(
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT: ret i32 [[X]]
;
@@ -155,8 +198,17 @@ define i32 @mul_poison() {
ret i32 %x
}
-define i32 @mul_poison1() {
-; CHECK-LABEL: @mul_poison1(
+define i32 @mul_poison_scalable_vector() {
+; CHECK-LABEL: @mul_poison_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> poison)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.mul.nxv8i32(<vscale x 8 x i32> poison)
+ ret i32 %x
+}
+
+define i32 @mul_poison_elt() {
+; CHECK-LABEL: @mul_poison_elt(
; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> <i32 0, i32 1, i32 poison, i32 1, i32 1, i32 1, i32 1, i32 1>)
@@ -171,6 +223,15 @@ define i32 @and_0() {
ret i32 %x
}
+define i32 @and_0_scalable_vector() {
+; CHECK-LABEL: @and_0_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+ ret i32 %x
+}
+
define i32 @and_1() {
; CHECK-LABEL: @and_1(
; CHECK-NEXT: ret i32 1
@@ -179,6 +240,15 @@ define i32 @and_1() {
ret i32 %x
}
+define i32 @and_1_scalable_vector() {
+; CHECK-LABEL: @and_1_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+ ret i32 %x
+}
+
define i32 @and_inc() {
; CHECK-LABEL: @and_inc(
; CHECK-NEXT: ret i32 0
@@ -204,8 +274,17 @@ define i32 @and_undef() {
ret i32 %x
}
-define i32 @and_undef1() {
-; CHECK-LABEL: @and_undef1(
+define i32 @and_undef_scalable_vector() {
+; CHECK-LABEL: @and_undef_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> undef)
+ ret i32 %x
+}
+
+define i32 @and_undef_elt() {
+; CHECK-LABEL: @and_undef_elt(
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT: ret i32 [[X]]
;
@@ -221,8 +300,17 @@ define i32 @and_poison() {
ret i32 %x
}
-define i32 @and_poison1() {
-; CHECK-LABEL: @and_poison1(
+define i32 @and_poison_scalable_vector() {
+; CHECK-LABEL: @and_poison_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> poison)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> poison)
+ ret i32 %x
+}
+
+define i32 @and_poison_elt() {
+; CHECK-LABEL: @and_poison_elt(
; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> <i32 -1, i32 1, i32 poison, i32 1, i32 1, i32 1, i32 1, i32 1>)
@@ -237,6 +325,15 @@ define i32 @or_0() {
ret i32 %x
}
+define i32 @or_0_scalable_vector() {
+; CHECK-LABEL: @or_0_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+ ret i32 %x
+}
+
define i32 @or_1() {
; CHECK-LABEL: @or_1(
; CHECK-NEXT: ret i32 1
@@ -245,6 +342,15 @@ define i32 @or_1() {
ret i32 %x
}
+define i32 @or_1_scalable_vector() {
+; CHECK-LABEL: @or_1_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+ ret i32 %x
+}
+
define i32 @or_inc() {
; CHECK-LABEL: @or_inc(
; CHECK-NEXT: ret i32 -1
@@ -270,8 +376,17 @@ define i32 @or_undef() {
ret i32 %x
}
-define i32 @or_undef1() {
-; CHECK-LABEL: @or_undef1(
+define i32 @or_undef_scalable_vector() {
+; CHECK-LABEL: @or_undef_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.or.v8i32(<vscale x 8 x i32> undef)
+ ret i32 %x
+}
+
+define i32 @or_undef_elt() {
+; CHECK-LABEL: @or_undef_elt(
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT: ret i32 [[X]]
;
@@ -287,8 +402,17 @@ define i32 @or_poison() {
ret i32 %x
}
-define i32 @or_poison1() {
-; CHECK-LABEL: @or_poison1(
+define i32 @or_poison_scalable_vector() {
+; CHECK-LABEL: @or_poison_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> poison)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> poison)
+ ret i32 %x
+}
+
+define i32 @or_poison_elt() {
+; CHECK-LABEL: @or_poison_elt(
; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> <i32 1, i32 0, i32 poison, i32 1, i32 1, i32 1, i32 1, i32 1>)
@@ -303,6 +427,15 @@ define i32 @xor_0() {
ret i32 %x
}
+define i32 @xor_0_scalable_vector() {
+; CHECK-LABEL: @xor_0_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+ ret i32 %x
+}
+
define i32 @xor_1() {
; CHECK-LABEL: @xor_1(
; CHECK-NEXT: ret i32 0
@@ -311,6 +444,15 @@ define i32 @xor_1() {
ret i32 %x
}
+define i32 @xor_1_scalable_vector() {
+; CHECK-LABEL: @xor_1_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> splat(i32 1))
+ ret i32 %x
+}
+
define i32 @xor_inc() {
; CHECK-LABEL: @xor_inc(
; CHECK-NEXT: ret i32 10
@@ -336,8 +478,17 @@ define i32 @xor_undef() {
ret i32 %x
}
-define i32 @xor_undef1() {
-; CHECK-LABEL: @xor_undef1(
+define i32 @xor_undef_scalable_vector() {
+; CHECK-LABEL: @xor_undef_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> undef)
+ ret i32 %x
+}
+
+define i32 @xor_undef_elt() {
+; CHECK-LABEL: @xor_undef_elt(
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT: ret i32 [[X]]
;
@@ -353,8 +504,17 @@ define i32 @xor_poison() {
ret i32 %x
}
-define i32 @xor_poison1() {
-; CHECK-LABEL: @xor_poison1(
+define i32 @xor_poison_scalable_vector() {
+; CHECK-LABEL: @xor_poison_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> poison)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> poison)
+ ret i32 %x
+}
+
+define i32 @xor_poison_elt() {
+; CHECK-LABEL: @xor_poison_elt(
; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> <i32 poison, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>)
@@ -369,6 +529,15 @@ define i32 @smin_0() {
ret i32 %x
}
+define i32 @smin_0_scalable_vector() {
+; CHECK-LABEL: @smin_0_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+ ret i32 %x
+}
+
define i32 @smin_1() {
; CHECK-LABEL: @smin_1(
; CHECK-NEXT: ret i32 1
@@ -377,6 +546,15 @@ define i32 @smin_1() {
ret i32 %x
}
+define i32 @smin_1_scalable_vector() {
+; CHECK-LABEL: @smin_1_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> splat(i32 1))
+ ret i32 %x
+}
+
define i32 @smin_inc() {
; CHECK-LABEL: @smin_inc(
; CHECK-NEXT: ret i32 -6
@@ -402,8 +580,17 @@ define i32 @smin_undef() {
ret i32 %x
}
-define i32 @smin_undef1() {
-; CHECK-LABEL: @smin_undef1(
+define i32 @smin_undef_scalable_vector() {
+; CHECK-LABEL: @smin_undef_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> undef)
+ ret i32 %x
+}
+
+define i32 @smin_undef_elt() {
+; CHECK-LABEL: @smin_undef_elt(
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT: ret i32 [[X]]
;
@@ -419,8 +606,17 @@ define i32 @smin_poison() {
ret i32 %x
}
-define i32 @smin_poison1() {
-; CHECK-LABEL: @smin_poison1(
+define i32 @smin_poison_scalable_vector() {
+; CHECK-LABEL: @smin_poison_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> poison)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> poison)
+ ret i32 %x
+}
+
+define i32 @smin_poison_elt() {
+; CHECK-LABEL: @smin_poison_elt(
; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 poison, i32 1, i32 1, i32 1>)
@@ -435,6 +631,15 @@ define i32 @smax_0() {
ret i32 %x
}
+define i32 @smax_0_scalable_vector() {
+; CHECK-LABEL: @smax_0_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+ ret i32 %x
+}
+
define i32 @smax_1() {
; CHECK-LABEL: @smax_1(
; CHECK-NEXT: ret i32 1
@@ -443,6 +648,15 @@ define i32 @smax_1() {
ret i32 %x
}
+define i32 @smax_1_scalable_vector() {
+; CHECK-LABEL: @smax_1_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> splat(i32 1))
+ ret i32 %x
+}
+
define i32 @smax_inc() {
; CHECK-LABEL: @smax_inc(
; CHECK-NEXT: ret i32 8
@@ -468,8 +682,17 @@ define i32 @smax_undef() {
ret i32 %x
}
-define i32 @smax_undef1() {
-; CHECK-LABEL: @smax_undef1(
+define i32 @smax_undef_scalable_vector() {
+; CHECK-LABEL: @smax_undef_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> undef)
+ ret i32 %x
+}
+
+define i32 @smax_undef_elt() {
+; CHECK-LABEL: @smax_undef_elt(
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT: ret i32 [[X]]
;
@@ -485,8 +708,17 @@ define i32 @smax_poison() {
ret i32 %x
}
-define i32 @smax_poison1() {
-; CHECK-LABEL: @smax_poison1(
+define i32 @smax_poison_scalable_vector() {
+; CHECK-LABEL: @smax_poison_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> poison)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.smax.nxv8i32(<vscale x 8 x i32> poison)
+ ret i32 %x
+}
+
+define i32 @smax_poison_elt() {
+; CHECK-LABEL: @smax_poison_elt(
; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> <i32 1, i32 1, i32 0, i32 1, i32 1, i32 1, i32 1, i32 poison>)
@@ -501,6 +733,15 @@ define i32 @umin_0() {
ret i32 %x
}
+define i32 @umin_0_scalable_vector() {
+; CHECK-LABEL: @umin_0_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+ ret i32 %x
+}
+
define i32 @umin_1() {
; CHECK-LABEL: @umin_1(
; CHECK-NEXT: ret i32 1
@@ -509,6 +750,15 @@ define i32 @umin_1() {
ret i32 %x
}
+define i32 @umin_1_scalable_vector() {
+; CHECK-LABEL: @umin_1_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+ ret i32 %x
+}
+
define i32 @umin_inc() {
; CHECK-LABEL: @umin_inc(
; CHECK-NEXT: ret i32 1
@@ -534,8 +784,17 @@ define i32 @umin_undef() {
ret i32 %x
}
-define i32 @umin_undef1() {
-; CHECK-LABEL: @umin_undef1(
+define i32 @umin_undef_scalable_vector() {
+; CHECK-LABEL: @umin_undef_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> undef)
+ ret i32 %x
+}
+
+define i32 @umin_undef_elt() {
+; CHECK-LABEL: @umin_undef_elt(
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT: ret i32 [[X]]
;
@@ -551,8 +810,17 @@ define i32 @umin_poison() {
ret i32 %x
}
-define i32 @umin_poison1() {
-; CHECK-LABEL: @umin_poison1(
+define i32 @umin_poison_scalable_vector() {
+; CHECK-LABEL: @umin_poison_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> poison)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.umin.nxv8i32(<vscale x 8 x i32> poison)
+ ret i32 %x
+}
+
+define i32 @umin_poison_elt() {
+; CHECK-LABEL: @umin_poison_elt(
; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> <i32 1, i32 1, i32 -1, i32 poison, i32 1, i32 1, i32 1, i32 1>)
@@ -567,6 +835,15 @@ define i32 @umax_0() {
ret i32 %x
}
+define i32 @umax_0_scalable_vector() {
+; CHECK-LABEL: @umax_0_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> zeroinitializer)
+ ret i32 %x
+}
+
define i32 @umax_1() {
; CHECK-LABEL: @umax_1(
; CHECK-NEXT: ret i32 1
@@ -575,6 +852,15 @@ define i32 @umax_1() {
ret i32 %x
}
+define i32 @umax_1_scalable_vector() {
+; CHECK-LABEL: @umax_1_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> splat (i32 1))
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> splat(i32 1))
+ ret i32 %x
+}
+
define i32 @umax_inc() {
; CHECK-LABEL: @umax_inc(
; CHECK-NEXT: ret i32 -3
@@ -600,8 +886,17 @@ define i32 @umax_undef() {
ret i32 %x
}
-define i32 @umax_undef1() {
-; CHECK-LABEL: @umax_undef1(
+define i32 @umax_undef_scalable_vector() {
+; CHECK-LABEL: @umax_undef_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> undef)
+ ret i32 %x
+}
+
+define i32 @umax_undef_elt() {
+; CHECK-LABEL: @umax_undef_elt(
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> <i32 1, i32 1, i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1>)
; CHECK-NEXT: ret i32 [[X]]
;
@@ -617,8 +912,17 @@ define i32 @umax_poison() {
ret i32 %x
}
-define i32 @umax_poison1() {
-; CHECK-LABEL: @umax_poison1(
+define i32 @umax_poison_scalable_vector() {
+; CHECK-LABEL: @umax_poison_scalable_vector(
+; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> poison)
+; CHECK-NEXT: ret i32 [[X]]
+;
+ %x = call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> poison)
+ ret i32 %x
+}
+
+define i32 @umax_poison_elt() {
+; CHECK-LABEL: @umax_poison_elt(
; CHECK-NEXT: ret i32 poison
;
%x = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> <i32 1, i32 1, i32 poison, i32 1, i32 1, i32 poison, i32 1, i32 1>)
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
index 7412980..12d73a3 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
@@ -425,3 +425,79 @@ loop:
exit:
ret void
}
+
+define void @hoist_previous_value_and_operand_load(ptr %dst) {
+; CHECK-LABEL: @hoist_previous_value_and_operand_load(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[LOAD:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[OR:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OR]] = or i32 [[FOR_1]], 3
+; CHECK-NEXT: [[ADD]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[IV]]
+; CHECK-NEXT: store i32 [[FOR_2]], ptr [[GEP]], align 4
+; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[IV]], 337
+; CHECK-NEXT: [[LOAD]] = load i32, ptr [[DST]], align 4
+; CHECK-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 1, %entry ], [ %add, %loop ]
+ %for.1 = phi i32 [ 1, %entry ], [ %load, %loop ]
+ %for.2 = phi i32 [ 0, %entry ], [ %or, %loop ]
+ %or = or i32 %for.1, 3
+ %add = add i64 %iv, 1
+ %gep = getelementptr inbounds i32, ptr %dst, i64 %iv
+ store i32 %for.2, ptr %gep
+ %icmp = icmp ult i64 %iv, 337
+ %load = load i32, ptr %dst
+ br i1 %icmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+define void @hoist_previous_value_and_operand_assume(ptr %dst) {
+; CHECK-LABEL: @hoist_previous_value_and_operand_assume(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[FOR_1:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[TRUNC:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[FOR_2:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[OR:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OR]] = or i1 [[FOR_1]], true
+; CHECK-NEXT: [[ADD]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[IV]]
+; CHECK-NEXT: store i1 [[FOR_2]], ptr [[GEP]], align 1
+; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[IV]], 337
+; CHECK-NEXT: call void @llvm.assume(i1 [[FOR_1]])
+; CHECK-NEXT: [[TRUNC]] = trunc i64 [[IV]] to i1
+; CHECK-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 1, %entry ], [ %add, %loop ]
+ %for.1 = phi i1 [ 1, %entry ], [ %trunc, %loop ]
+ %for.2 = phi i1 [ 0, %entry ], [ %or, %loop ]
+ %or = or i1 %for.1, 3
+ %add = add i64 %iv, 1
+ %gep = getelementptr inbounds i32, ptr %dst, i64 %iv
+ store i1 %for.2, ptr %gep
+ %icmp = icmp ult i64 %iv, 337
+ call void @llvm.assume(i1 %for.1)
+ %trunc = trunc i64 %iv to i1
+ br i1 %icmp, label %loop, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index 964a257..fafa82c 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -2800,6 +2800,88 @@ exit:
ret i64 %r.0.lcssa
}
+define i32 @reduction_expression_ext_mulacc_livein(ptr %a, i16 %c) {
+; CHECK-LABEL: define i32 @reduction_expression_ext_mulacc_livein(
+; CHECK-SAME: ptr [[A:%.*]], i16 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> [[BROADCAST_SPLAT]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
+; CHECK-NEXT: [[TMP5]] = add i32 [[VEC_PHI]], [[TMP4]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[FOR_EXIT:.*]]
+; CHECK: [[FOR_EXIT]]:
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+; CHECK-INTERLEAVED-LABEL: define i32 @reduction_expression_ext_mulacc_livein(
+; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], i16 [[C:%.*]]) {
+; CHECK-INTERLEAVED-NEXT: [[ENTRY:.*:]]
+; CHECK-INTERLEAVED-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK-INTERLEAVED: [[VECTOR_PH]]:
+; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i64 0
+; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-INTERLEAVED-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INTERLEAVED: [[VECTOR_BODY]]:
+; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
+; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
+; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
+; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16>
+; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i16>
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul <4 x i16> [[BROADCAST_SPLAT]], [[TMP2]]
+; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul <4 x i16> [[BROADCAST_SPLAT]], [[TMP3]]
+; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
+; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]])
+; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add i32 [[VEC_PHI]], [[TMP7]]
+; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i32>
+; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
+; CHECK-INTERLEAVED-NEXT: [[TMP11]] = add i32 [[VEC_PHI1]], [[TMP10]]
+; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-INTERLEAVED-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
+; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]:
+; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP11]], [[TMP8]]
+; CHECK-INTERLEAVED-NEXT: br label %[[FOR_EXIT:.*]]
+; CHECK-INTERLEAVED: [[FOR_EXIT]]:
+; CHECK-INTERLEAVED-NEXT: ret i32 [[BIN_RDX]]
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %gep.a = getelementptr i8, ptr %a, i64 %iv
+ %load.a = load i8, ptr %gep.a, align 1
+ %ext.a = zext i8 %load.a to i16
+ %mul = mul i16 %c, %ext.a
+ %mul.ext = zext i16 %mul to i32
+ %add = add i32 %mul.ext, %accum
+ %iv.next = add i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %for.exit, label %for.body
+
+for.exit: ; preds = %for.body
+ ret i32 %add
+}
+
declare float @llvm.fmuladd.f32(float, float, float)
!6 = distinct !{!6, !7, !8}
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index 06b0448..291ada8 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -800,3 +800,545 @@ exit:
%r.0.lcssa = phi i64 [ %rdx.next, %loop ]
ret i64 %r.0.lcssa
}
+
+define i32 @print_mulacc_extended_const(ptr %start, ptr %end) {
+; CHECK-LABEL: 'print_mulacc_extended_const'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<%0> = VF
+; CHECK-NEXT: Live-in vp<%1> = VF * UF
+; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: vp<%3> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (1 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64))
+; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1>
+; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9>
+; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7>
+; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<%8>
+; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul (ir<%l> zext to i32), (ir<63> zext to i32))
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
+; CHECK-NEXT: IR %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
+; CHECK-NEXT: IR %l.ext = zext i8 %l to i32
+; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63
+; CHECK-NEXT: IR %red.next = add i32 %red, %mul
+; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
+; CHECK-NEXT: Live-in ir<%1> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64
+; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64
+; CHECK-NEXT: IR %0 = add i64 %end1, 1
+; CHECK-NEXT: IR %1 = sub i64 %0, %start2
+; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4>
+; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check>
+; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4>
+; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf>
+; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1>
+; CHECK-NEXT: Successor(s): vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep>
+; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = zext ir<%l> to i32
+; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<63>
+; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>)
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec>
+; CHECK-NEXT: Successor(s): middle.block, vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<scalar.ph>:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %red = phi i32 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
+; CHECK-NEXT: IR %l.ext = zext i8 %l to i32
+; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63
+; CHECK-NEXT: IR %red.next = add i32 %red, %mul
+; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+entry:
+ br label %loop
+
+loop:
+ %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ]
+ %red = phi i32 [ 0, %entry ], [ %red.next, %loop ]
+ %l = load i8, ptr %ptr.iv, align 1
+ %l.ext = zext i8 %l to i32
+ %mul = mul i32 %l.ext, 63
+ %red.next = add i32 %red, %mul
+ %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+ %ec = icmp eq ptr %ptr.iv, %end
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i32 %red.next
+}
+
+; Constants >= 128 cannot be treated as sign-extended, so the expression shouldn't extend 128
+define i32 @print_mulacc_not_extended_const(ptr %start, ptr %end) {
+; CHECK-LABEL: 'print_mulacc_not_extended_const'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<%0> = VF
+; CHECK-NEXT: Live-in vp<%1> = VF * UF
+; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: vp<%3> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (1 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64))
+; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1>
+; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9>
+; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7>
+; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<%8>
+; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32
+; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul ir<%l.ext>, ir<128>)
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
+; CHECK-NEXT: IR %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
+; CHECK-NEXT: IR %l.ext = sext i8 %l to i32
+; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128
+; CHECK-NEXT: IR %red.next = add i32 %red, %mul
+; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
+; CHECK-NEXT: Live-in ir<%1> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64
+; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64
+; CHECK-NEXT: IR %0 = add i64 %end1, 1
+; CHECK-NEXT: IR %1 = sub i64 %0, %start2
+; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4>
+; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check>
+; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4>
+; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf>
+; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1>
+; CHECK-NEXT: Successor(s): vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep>
+; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32
+; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128>
+; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>)
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec>
+; CHECK-NEXT: Successor(s): middle.block, vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %red.next.lcssa = phi i32 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<scalar.ph>:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %red = phi i32 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
+; CHECK-NEXT: IR %l.ext = sext i8 %l to i32
+; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128
+; CHECK-NEXT: IR %red.next = add i32 %red, %mul
+; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+entry:
+ br label %loop
+
+loop:
+ %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ]
+ %red = phi i32 [ 0, %entry ], [ %red.next, %loop ]
+ %l = load i8, ptr %ptr.iv, align 1
+ %l.ext = sext i8 %l to i32
+ %mul = mul i32 %l.ext, 128
+ %red.next = add i32 %red, %mul
+ %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+ %ec = icmp eq ptr %ptr.iv, %end
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ %red.next.lcssa = phi i32 [ %red.next, %loop ]
+ ret i32 %red.next.lcssa
+}
+
+define i64 @print_ext_mulacc_extended_const(ptr %start, ptr %end) {
+; CHECK-LABEL: 'print_ext_mulacc_extended_const'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<%0> = VF
+; CHECK-NEXT: Live-in vp<%1> = VF * UF
+; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: vp<%3> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (1 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64))
+; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1>
+; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9>
+; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7>
+; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<%8>
+; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (mul (ir<%l> zext to i64), (ir<63> zext to i64))
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
+; CHECK-NEXT: IR %red = phi i64 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
+; CHECK-NEXT: IR %l.ext = zext i8 %l to i32
+; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63
+; CHECK-NEXT: IR %mul.ext = zext i32 %mul to i64
+; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext
+; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
+; CHECK-NEXT: Live-in ir<%1> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64
+; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64
+; CHECK-NEXT: IR %0 = add i64 %end1, 1
+; CHECK-NEXT: IR %1 = sub i64 %0, %start2
+; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4>
+; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check>
+; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4>
+; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf>
+; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1>
+; CHECK-NEXT: Successor(s): vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep>
+; CHECK-NEXT: WIDEN-CAST vp<%4> = zext ir<%l> to i64
+; CHECK-NEXT: WIDEN ir<%mul> = mul vp<%4>, ir<63>
+; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul>)
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec>
+; CHECK-NEXT: Successor(s): middle.block, vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%6> = compute-reduction-result ir<%red>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%6> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<scalar.ph>:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%6>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %red = phi i64 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
+; CHECK-NEXT: IR %l.ext = zext i8 %l to i32
+; CHECK-NEXT: IR %mul = mul i32 %l.ext, 63
+; CHECK-NEXT: IR %mul.ext = zext i32 %mul to i64
+; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext
+; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+entry:
+ br label %loop
+
+loop:
+ %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ]
+ %red = phi i64 [ 0, %entry ], [ %red.next, %loop ]
+ %l = load i8, ptr %ptr.iv, align 1
+ %l.ext = zext i8 %l to i32
+ %mul = mul i32 %l.ext, 63
+ %mul.ext = zext i32 %mul to i64
+ %red.next = add i64 %red, %mul.ext
+ %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+ %ec = icmp eq ptr %ptr.iv, %end
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret i64 %red.next
+}
+
+; Constants >= 128 cannot be treated as sign-extended, so the expression shouldn't extend 128
+define i64 @print_ext_mulacc_not_extended_const(ptr %start, ptr %end) {
+; CHECK-LABEL: 'print_ext_mulacc_not_extended_const'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<%0> = VF
+; CHECK-NEXT: Live-in vp<%1> = VF * UF
+; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: vp<%3> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (1 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64))
+; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%start> + vp<%2> * ir<1>
+; CHECK-NEXT: EMIT vp<%5> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<%6> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi vp<%5>, vp<%9>
+; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%6>, ir<1>, vp<%0>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%7>
+; CHECK-NEXT: vp<%8> = vector-pointer vp<%next.gep>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<%8>
+; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32
+; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128>
+; CHECK-NEXT: EXPRESSION vp<%9> = ir<%red> + reduce.add (ir<%mul> sext to i64)
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%6>, vp<%1>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%11> = compute-reduction-result ir<%red>, vp<%9>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%11> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%4>, middle.block ], [ ir<%start>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%11>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from scalar.ph)
+; CHECK-NEXT: IR %red = phi i64 [ 0, %entry ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
+; CHECK-NEXT: IR %l.ext = sext i8 %l to i32
+; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128
+; CHECK-NEXT: IR %mul.ext = sext i32 %mul to i64
+; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext
+; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
+; CHECK-NEXT: Live-in ir<%1> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: IR %start2 = ptrtoint ptr %start to i64
+; CHECK-NEXT: IR %end1 = ptrtoint ptr %end to i64
+; CHECK-NEXT: IR %0 = add i64 %end1, 1
+; CHECK-NEXT: IR %1 = sub i64 %0, %start2
+; CHECK-NEXT: EMIT vp<%min.iters.check> = icmp ult ir<%1>, ir<4>
+; CHECK-NEXT: EMIT branch-on-cond vp<%min.iters.check>
+; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: EMIT vp<%n.mod.vf> = urem ir<%1>, ir<4>
+; CHECK-NEXT: EMIT vp<%n.vec> = sub ir<%1>, vp<%n.mod.vf>
+; CHECK-NEXT: vp<%3> = DERIVED-IV ir<%start> + vp<%n.vec> * ir<1>
+; CHECK-NEXT: Successor(s): vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, vector.ph ], [ vp<%index.next>, vector.body ]
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<%next.gep> = ptradd ir<%start>, vp<%index>
+; CHECK-NEXT: WIDEN ir<%l> = load vp<%next.gep>
+; CHECK-NEXT: WIDEN-CAST ir<%l.ext> = sext ir<%l> to i32
+; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%l.ext>, ir<128>
+; CHECK-NEXT: WIDEN-CAST ir<%mul.ext> = sext ir<%mul> to i64
+; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reduce.add (ir<%mul.ext>)
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%n.vec>
+; CHECK-NEXT: Successor(s): middle.block, vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%5> = compute-reduction-result ir<%red>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%1>, vp<%n.vec>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: IR %red.next.lcssa = phi i64 [ %red.next, %loop ] (extra operand: vp<%5> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<scalar.ph>:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%3>, middle.block ], [ ir<%start>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%5>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<loop>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<loop>:
+; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %gep.iv.next, %loop ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %red = phi i64 [ 0, %scalar.ph ], [ %red.next, %loop ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1
+; CHECK-NEXT: IR %l.ext = sext i8 %l to i32
+; CHECK-NEXT: IR %mul = mul i32 %l.ext, 128
+; CHECK-NEXT: IR %mul.ext = sext i32 %mul to i64
+; CHECK-NEXT: IR %red.next = add i64 %red, %mul.ext
+; CHECK-NEXT: IR %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+; CHECK-NEXT: IR %ec = icmp eq ptr %ptr.iv, %end
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+entry:
+ br label %loop
+
+loop:
+ %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ]
+ %red = phi i64 [ 0, %entry ], [ %red.next, %loop ]
+ %l = load i8, ptr %ptr.iv, align 1
+ %l.ext = sext i8 %l to i32
+ %mul = mul i32 %l.ext, 128
+ %mul.ext = sext i32 %mul to i64
+ %red.next = add i64 %red, %mul.ext
+ %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1
+ %ec = icmp eq ptr %ptr.iv, %end
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ %red.next.lcssa = phi i64 [ %red.next, %loop ]
+ ret i64 %red.next.lcssa
+}
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll
new file mode 100644
index 0000000..a804225
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -S | FileCheck %s
+
+; Test whether the UTC format the switch-cases correctly, which requires TWO extra spaces.
+
+define i8 @testi8(i8 %x) {
+ switch i8 %x, label %default [
+ i8 0, label %case1
+ i8 1, label %case2
+ i8 2, label %case3
+ i8 3, label %case3
+ ]
+default:
+ ret i8 0
+case1:
+ ret i8 1
+case2:
+ ret i8 2
+case3:
+ ret i8 3
+}
+
+define i32 @testi32(i32 %x) {
+ switch i32 %x, label %default [
+ i32 0, label %case1
+ i32 1, label %case2
+ i32 2, label %case3
+ i32 3, label %case3
+ ]
+default:
+ ret i32 0
+case1:
+ ret i32 1
+case2:
+ ret i32 2
+case3:
+ ret i32 3
+}
+
+define i128 @testi128(i128 %x) {
+ switch i128 %x, label %default [
+ i128 0, label %case1
+ i128 1, label %case2
+ i128 2, label %case3
+ i128 3, label %case3
+ ]
+default:
+ ret i128 0
+case1:
+ ret i128 1
+case2:
+ ret i128 2
+case3:
+ ret i128 3
+}
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected
new file mode 100644
index 0000000..b1977e7
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/switch_case.ll.expected
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 7
+; RUN: opt < %s -S | FileCheck %s
+
+; Test whether the UTC format the switch-cases correctly, which requires TWO extra spaces.
+
+define i8 @testi8(i8 %x) {
+; CHECK-LABEL: define i8 @testi8(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT: switch i8 [[X]], label %[[DEFAULT:.*]] [
+; CHECK-NEXT: i8 0, label %[[CASE1:.*]]
+; CHECK-NEXT: i8 1, label %[[CASE2:.*]]
+; CHECK-NEXT: i8 2, label %[[CASE3:.*]]
+; CHECK-NEXT: i8 3, label %[[CASE3]]
+; CHECK-NEXT: ]
+; CHECK: [[DEFAULT]]:
+; CHECK-NEXT: ret i8 0
+; CHECK: [[CASE1]]:
+; CHECK-NEXT: ret i8 1
+; CHECK: [[CASE2]]:
+; CHECK-NEXT: ret i8 2
+; CHECK: [[CASE3]]:
+; CHECK-NEXT: ret i8 3
+;
+ switch i8 %x, label %default [
+ i8 0, label %case1
+ i8 1, label %case2
+ i8 2, label %case3
+ i8 3, label %case3
+ ]
+default:
+ ret i8 0
+case1:
+ ret i8 1
+case2:
+ ret i8 2
+case3:
+ ret i8 3
+}
+
+define i32 @testi32(i32 %x) {
+; CHECK-LABEL: define i32 @testi32(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [
+; CHECK-NEXT: i32 0, label %[[CASE1:.*]]
+; CHECK-NEXT: i32 1, label %[[CASE2:.*]]
+; CHECK-NEXT: i32 2, label %[[CASE3:.*]]
+; CHECK-NEXT: i32 3, label %[[CASE3]]
+; CHECK-NEXT: ]
+; CHECK: [[DEFAULT]]:
+; CHECK-NEXT: ret i32 0
+; CHECK: [[CASE1]]:
+; CHECK-NEXT: ret i32 1
+; CHECK: [[CASE2]]:
+; CHECK-NEXT: ret i32 2
+; CHECK: [[CASE3]]:
+; CHECK-NEXT: ret i32 3
+;
+ switch i32 %x, label %default [
+ i32 0, label %case1
+ i32 1, label %case2
+ i32 2, label %case3
+ i32 3, label %case3
+ ]
+default:
+ ret i32 0
+case1:
+ ret i32 1
+case2:
+ ret i32 2
+case3:
+ ret i32 3
+}
+
+define i128 @testi128(i128 %x) {
+; CHECK-LABEL: define i128 @testi128(
+; CHECK-SAME: i128 [[X:%.*]]) {
+; CHECK-NEXT: switch i128 [[X]], label %[[DEFAULT:.*]] [
+; CHECK-NEXT: i128 0, label %[[CASE1:.*]]
+; CHECK-NEXT: i128 1, label %[[CASE2:.*]]
+; CHECK-NEXT: i128 2, label %[[CASE3:.*]]
+; CHECK-NEXT: i128 3, label %[[CASE3]]
+; CHECK-NEXT: ]
+; CHECK: [[DEFAULT]]:
+; CHECK-NEXT: ret i128 0
+; CHECK: [[CASE1]]:
+; CHECK-NEXT: ret i128 1
+; CHECK: [[CASE2]]:
+; CHECK-NEXT: ret i128 2
+; CHECK: [[CASE3]]:
+; CHECK-NEXT: ret i128 3
+;
+ switch i128 %x, label %default [
+ i128 0, label %case1
+ i128 1, label %case2
+ i128 2, label %case3
+ i128 3, label %case3
+ ]
+default:
+ ret i128 0
+case1:
+ ret i128 1
+case2:
+ ret i128 2
+case3:
+ ret i128 3
+}
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/switch_case.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/switch_case.test
new file mode 100644
index 0000000..891dbe0
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/switch_case.test
@@ -0,0 +1,3 @@
+## switch_case test checking that update_test_checks.py works correctly
+# RUN: cp -f %S/Inputs/switch_case.ll %t.ll && %update_test_checks %t.ll --version 7
+# RUN: diff -u %t.ll %S/Inputs/switch_case.ll.expected
diff --git a/llvm/test/tools/dxil-dis/llvm_assume.ll b/llvm/test/tools/dxil-dis/llvm_assume.ll
new file mode 100644
index 0000000..f5be66c
--- /dev/null
+++ b/llvm/test/tools/dxil-dis/llvm_assume.ll
@@ -0,0 +1,11 @@
+; RUN: llc --filetype=obj %s -o - | dxil-dis -o - | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.7-library"
+
+define void @test_llvm_assume(i1 %0) {
+; CHECK-LABEL: test_llvm_assume
+; CHECK-NEXT: tail call void @llvm.assume(i1 %0)
+tail call void @llvm.assume(i1 %0)
+ret void
+}
+
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
new file mode 100644
index 0000000..519edf04
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
@@ -0,0 +1,251 @@
+## This test uses TU index for type parsing in dwp and makes sure the DWARF4 type is
+## successfully retrieved.
+
+## cd to a unique dir so we can refer to the file as just "test.dwo" in the
+## assembly test input below.
+# RUN: rm -rf %t
+# RUN: mkdir %t
+# RUN: cd %t
+
+# RUN: llvm-mc %s --split-dwarf-file=test.dwo -filetype obj -triple x86_64 -o test.o
+# RUN: llvm-dwp -e test.o -o test.dwp
+# RUN: llvm-dwarfdump test.dwp | FileCheck %s
+
+# Generated from:
+#
+# struct t1 { };
+# t1 v1;
+#
+# $ clang++ -S -g -fdebug-types-section -gsplit-dwarf -o test.4.split.dwp.s -gdwarf-4
+
+# CHECK: DW_TAG_variable
+# CHECK: DW_AT_type ({{.*}} "t1")
+ .file "test.cpp"
+ .section .debug_types.dwo,"e",@progbits
+ .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
+.Ldebug_info_dwo_start0:
+ .short 4 # DWARF version number
+ .long 0 # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .quad -4149699470930386446 # Type Signature
+ .long 30 # Type DIE Offset
+ .byte 1 # Abbrev [1] 0x17:0xe DW_TAG_type_unit
+ .short 33 # DW_AT_language
+ .long 0 # DW_AT_stmt_list
+ .byte 2 # Abbrev [2] 0x1e:0x6 DW_TAG_structure_type
+ .byte 5 # DW_AT_calling_convention
+ .byte 1 # DW_AT_name
+ .byte 1 # DW_AT_byte_size
+ .byte 1 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .byte 0 # End Of Children Mark
+.Ldebug_info_dwo_end0:
+ .file 1 "." "test.cpp"
+ .type v1,@object # @v1
+ .bss
+ .globl v1
+v1:
+ .zero 1
+ .size v1, 1
+
+ .section .debug_abbrev,"",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 0 # DW_CHILDREN_no
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 14 # DW_FORM_strp
+ .ascii "\264B" # DW_AT_GNU_pubnames
+ .byte 25 # DW_FORM_flag_present
+ .ascii "\260B" # DW_AT_GNU_dwo_name
+ .byte 14 # DW_FORM_strp
+ .ascii "\261B" # DW_AT_GNU_dwo_id
+ .byte 7 # DW_FORM_data8
+ .ascii "\263B" # DW_AT_GNU_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"",@progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 4 # DWARF version number
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 1 # Abbrev [1] 0xb:0x19 DW_TAG_compile_unit
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Lskel_string0 # DW_AT_comp_dir
+ # DW_AT_GNU_pubnames
+ .long .Lskel_string1 # DW_AT_GNU_dwo_name
+ .quad 1388839634901268525 # DW_AT_GNU_dwo_id
+ .long .Laddr_table_base0 # DW_AT_GNU_addr_base
+.Ldebug_info_end0:
+ .section .debug_str,"MS",@progbits,1
+.Lskel_string0:
+ .asciz "." # string offset=0
+.Lskel_string1:
+ .asciz "test.dwo" # string offset=2
+ .section .debug_str.dwo,"eMS",@progbits,1
+.Linfo_string0:
+ .asciz "v1" # string offset=0
+.Linfo_string1:
+ .asciz "t1" # string offset=3
+.Linfo_string2:
+ .asciz "clang version 22.0.0" # string offset=6
+.Linfo_string3:
+ .asciz "test.cpp" # string offset=27
+.Linfo_string4:
+ .asciz "test.dwo" # string offset=36
+ .section .debug_str_offsets.dwo,"e",@progbits
+ .long 0
+ .long 3
+ .long 6
+ .long 27
+ .long 36
+ .section .debug_info.dwo,"e",@progbits
+ .long .Ldebug_info_dwo_end1-.Ldebug_info_dwo_start1 # Length of Unit
+.Ldebug_info_dwo_start1:
+ .short 4 # DWARF version number
+ .long 0 # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 3 # Abbrev [3] 0xb:0x23 DW_TAG_compile_unit
+ .byte 2 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 3 # DW_AT_name
+ .byte 4 # DW_AT_GNU_dwo_name
+ .quad 1388839634901268525 # DW_AT_GNU_dwo_id
+ .byte 4 # Abbrev [4] 0x19:0xb DW_TAG_variable
+ .byte 0 # DW_AT_name
+ .long 36 # DW_AT_type
+ # DW_AT_external
+ .byte 1 # DW_AT_decl_file
+ .byte 2 # DW_AT_decl_line
+ .byte 2 # DW_AT_location
+ .byte 251
+ .byte 0
+ .byte 5 # Abbrev [5] 0x24:0x9 DW_TAG_structure_type
+ # DW_AT_declaration
+ .quad -4149699470930386446 # DW_AT_signature
+ .byte 0 # End Of Children Mark
+.Ldebug_info_dwo_end1:
+ .section .debug_abbrev.dwo,"e",@progbits
+ .byte 1 # Abbreviation Code
+ .byte 65 # DW_TAG_type_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 19 # DW_TAG_structure_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 54 # DW_AT_calling_convention
+ .byte 11 # DW_FORM_data1
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .ascii "\260B" # DW_AT_GNU_dwo_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .ascii "\261B" # DW_AT_GNU_dwo_id
+ .byte 7 # DW_FORM_data8
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 4 # Abbreviation Code
+ .byte 52 # DW_TAG_variable
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 2 # DW_AT_location
+ .byte 24 # DW_FORM_exprloc
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 5 # Abbreviation Code
+ .byte 19 # DW_TAG_structure_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 60 # DW_AT_declaration
+ .byte 25 # DW_FORM_flag_present
+ .byte 105 # DW_AT_signature
+ .byte 32 # DW_FORM_ref_sig8
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_line.dwo,"e",@progbits
+.Ltmp0:
+ .long .Ldebug_line_end0-.Ldebug_line_start0 # unit length
+.Ldebug_line_start0:
+ .short 4
+ .long .Lprologue_end0-.Lprologue_start0
+.Lprologue_start0:
+ .byte 1
+ .byte 1
+ .byte 1
+ .byte -5
+ .byte 14
+ .byte 1
+ .byte 0
+ .ascii "test.cpp"
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+.Lprologue_end0:
+.Ldebug_line_end0:
+ .section .debug_addr,"",@progbits
+.Laddr_table_base0:
+ .quad v1
+ .section .debug_gnu_pubnames,"",@progbits
+ .long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info
+.LpubNames_start0:
+ .short 2 # DWARF Version
+ .long .Lcu_begin0 # Offset of Compilation Unit Info
+ .long 36 # Compilation Unit Length
+ .long 25 # DIE offset
+ .byte 32 # Attributes: VARIABLE, EXTERNAL
+ .asciz "v1" # External Name
+ .long 0 # End Mark
+.LpubNames_end0:
+ .section .debug_gnu_pubtypes,"",@progbits
+ .long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info
+.LpubTypes_start0:
+ .short 2 # DWARF Version
+ .long .Lcu_begin0 # Offset of Compilation Unit Info
+ .long 36 # Compilation Unit Length
+ .long 36 # DIE offset
+ .byte 16 # Attributes: TYPE, EXTERNAL
+ .asciz "t1" # External Name
+ .long 0 # End Mark
+.LpubTypes_end0:
+ .ident "clang version 22.0.0"
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
+ .section .debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/X4-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/X4-sve-instructions.s
index 19fba62..747507f 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/X4-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/X4-sve-instructions.s
@@ -1,7 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-x4 -instruction-tables < %s | FileCheck %s
-# Check the Neoverse V2 model is used.
+# Check the Neoverse V3 model is used.
addhnb z0.b, z1.h, z31.h
@@ -17,31 +17,36 @@ addhnb z0.b, z1.h, z31.h
# CHECK-NEXT: 1 2 0.25 addhnb z0.b, z1.h, z31.h
# CHECK: Resources:
-# CHECK-NEXT: [0.0] - V2UnitB
-# CHECK-NEXT: [0.1] - V2UnitB
-# CHECK-NEXT: [1.0] - V2UnitD
-# CHECK-NEXT: [1.1] - V2UnitD
-# CHECK-NEXT: [2.0] - V2UnitFlg
-# CHECK-NEXT: [2.1] - V2UnitFlg
-# CHECK-NEXT: [2.2] - V2UnitFlg
-# CHECK-NEXT: [3] - V2UnitL2
-# CHECK-NEXT: [4.0] - V2UnitL01
-# CHECK-NEXT: [4.1] - V2UnitL01
-# CHECK-NEXT: [5] - V2UnitM0
-# CHECK-NEXT: [6] - V2UnitM1
-# CHECK-NEXT: [7] - V2UnitS0
-# CHECK-NEXT: [8] - V2UnitS1
-# CHECK-NEXT: [9] - V2UnitS2
-# CHECK-NEXT: [10] - V2UnitS3
-# CHECK-NEXT: [11] - V2UnitV0
-# CHECK-NEXT: [12] - V2UnitV1
-# CHECK-NEXT: [13] - V2UnitV2
-# CHECK-NEXT: [14] - V2UnitV3
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
# CHECK: Resource pressure per iteration:
-# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14]
-# CHECK-NEXT: - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25
# CHECK: Resource pressure by instruction:
-# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2.0] [2.1] [2.2] [3] [4.0] [4.1] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] Instructions:
-# CHECK-NEXT: - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhnb z0.b, z1.h, z31.h
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-basic-instructions.s
new file mode 100644
index 0000000..73fd95d6
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-basic-instructions.s
@@ -0,0 +1,3779 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3 -instruction-tables < %s | FileCheck %s
+
+#------------------------------------------------------------------------------
+# Add/sub (immediate)
+#------------------------------------------------------------------------------
+
+add w2, w3, #4095
+add w30, w29, #1, lsl #12
+add w13, w5, #4095, lsl #12
+add x5, x7, #1638
+add w20, wsp, #801
+add wsp, wsp, #1104
+add wsp, w30, #4084
+add x0, x24, #291
+add x3, x24, #4095, lsl #12
+add x8, sp, #1074
+add sp, x29, #3816
+sub w0, wsp, #4077
+sub w4, w20, #546, lsl #12
+sub sp, sp, #288
+sub wsp, w19, #16
+adds w13, w23, #291, lsl #12
+cmn w2, #4095
+adds w20, wsp, #0
+cmn x3, #1, lsl #12
+cmp sp, #20, lsl #12
+cmp x30, #4095
+subs x4, sp, #3822
+cmn w3, #291, lsl #12
+cmn wsp, #1365
+cmn sp, #1092, lsl #12
+mov sp, x30
+mov wsp, w20
+mov x11, sp
+mov w24, wsp
+
+#------------------------------------------------------------------------------
+# Add-subtract (shifted register)
+#------------------------------------------------------------------------------
+
+add w3, w5, w7
+add wzr, w3, w5
+add w20, wzr, w4
+add w4, w6, wzr
+add w11, w13, w15
+add w9, w3, wzr, lsl #10
+add w17, w29, w20, lsl #31
+add w21, w22, w23, lsr #0
+add w24, w25, w26, lsr #18
+add w27, w28, w29, lsr #31
+add w2, w3, w4, asr #0
+add w5, w6, w7, asr #21
+add w8, w9, w10, asr #31
+add x3, x5, x7
+add xzr, x3, x5
+add x20, xzr, x4
+add x4, x6, xzr
+add x11, x13, x15
+add x9, x3, xzr, lsl #10
+add x17, x29, x20, lsl #63
+add x21, x22, x23, lsr #0
+add x24, x25, x26, lsr #18
+add x27, x28, x29, lsr #63
+add x2, x3, x4, asr #0
+add x5, x6, x7, asr #21
+add x8, x9, x10, asr #63
+adds w3, w5, w7
+cmn w3, w5
+adds w20, wzr, w4
+adds w4, w6, wzr
+adds w11, w13, w15
+adds w9, w3, wzr, lsl #10
+adds w17, w29, w20, lsl #31
+adds w21, w22, w23, lsr #0
+adds w24, w25, w26, lsr #18
+adds w27, w28, w29, lsr #31
+adds w2, w3, w4, asr #0
+adds w5, w6, w7, asr #21
+adds w8, w9, w10, asr #31
+adds x3, x5, x7
+cmn x3, x5
+adds x20, xzr, x4
+adds x4, x6, xzr
+adds x11, x13, x15
+adds x9, x3, xzr, lsl #10
+adds x17, x29, x20, lsl #63
+adds x21, x22, x23, lsr #0
+adds x24, x25, x26, lsr #18
+adds x27, x28, x29, lsr #63
+adds x2, x3, x4, asr #0
+adds x5, x6, x7, asr #21
+adds x8, x9, x10, asr #63
+sub w3, w5, w7
+sub wzr, w3, w5
+sub w4, w6, wzr
+sub w11, w13, w15
+sub w9, w3, wzr, lsl #10
+sub w17, w29, w20, lsl #31
+sub w21, w22, w23, lsr #0
+sub w24, w25, w26, lsr #18
+sub w27, w28, w29, lsr #31
+sub w2, w3, w4, asr #0
+sub w5, w6, w7, asr #21
+sub w8, w9, w10, asr #31
+sub x3, x5, x7
+sub xzr, x3, x5
+sub x4, x6, xzr
+sub x11, x13, x15
+sub x9, x3, xzr, lsl #10
+sub x17, x29, x20, lsl #63
+sub x21, x22, x23, lsr #0
+sub x24, x25, x26, lsr #18
+sub x27, x28, x29, lsr #63
+sub x2, x3, x4, asr #0
+sub x5, x6, x7, asr #21
+sub x8, x9, x10, asr #63
+subs w3, w5, w7
+cmp w3, w5
+subs w4, w6, wzr
+subs w11, w13, w15
+subs w9, w3, wzr, lsl #10
+subs w17, w29, w20, lsl #31
+subs w21, w22, w23, lsr #0
+subs w24, w25, w26, lsr #18
+subs w27, w28, w29, lsr #31
+subs w2, w3, w4, asr #0
+subs w5, w6, w7, asr #21
+subs w8, w9, w10, asr #31
+subs x3, x5, x7
+cmp x3, x5
+subs x4, x6, xzr
+subs x11, x13, x15
+subs x9, x3, xzr, lsl #10
+subs x17, x29, x20, lsl #63
+subs x21, x22, x23, lsr #0
+subs x24, x25, x26, lsr #18
+subs x27, x28, x29, lsr #63
+subs x2, x3, x4, asr #0
+subs x5, x6, x7, asr #21
+subs x8, x9, x10, asr #63
+cmn wzr, w4
+cmn w5, wzr
+cmn w6, w7
+cmn w8, w9, lsl #15
+cmn w10, w11, lsl #31
+cmn w12, w13, lsr #0
+cmn w14, w15, lsr #21
+cmn w16, w17, lsr #31
+cmn w18, w19, asr #0
+cmn w20, w21, asr #22
+cmn w22, w23, asr #31
+cmn x0, x3
+cmn xzr, x4
+cmn x5, xzr
+cmn x6, x7
+cmn x8, x9, lsl #15
+cmn x10, x11, lsl #63
+cmn x12, x13, lsr #0
+cmn x14, x15, lsr #41
+cmn x16, x17, lsr #63
+cmn x18, x19, asr #0
+cmn x20, x21, asr #55
+cmn x22, x23, asr #63
+cmp w0, w3
+cmp wzr, w4
+cmp w5, wzr
+cmp w6, w7
+cmp w8, w9, lsl #15
+cmp w10, w11, lsl #31
+cmp w12, w13, lsr #0
+cmp w14, w15, lsr #21
+cmp w18, w19, asr #0
+cmp w20, w21, asr #22
+cmp w22, w23, asr #31
+cmp x0, x3
+cmp xzr, x4
+cmp x5, xzr
+cmp x6, x7
+cmp x8, x9, lsl #15
+cmp x10, x11, lsl #63
+cmp x12, x13, lsr #0
+cmp x14, x15, lsr #41
+cmp x16, x17, lsr #63
+cmp x18, x19, asr #0
+cmp x20, x21, asr #55
+cmp x22, x23, asr #63
+cmp wzr, w0
+cmp xzr, x0
+
+#------------------------------------------------------------------------------
+# Add-subtract (shifted register)
+#------------------------------------------------------------------------------
+
+adc w29, w27, w25
+adc wzr, w3, w4
+adc w9, wzr, w10
+adc w20, w0, wzr
+adc x29, x27, x25
+adc xzr, x3, x4
+adc x9, xzr, x10
+adc x20, x0, xzr
+adcs w29, w27, w25
+adcs wzr, w3, w4
+adcs w9, wzr, w10
+adcs w20, w0, wzr
+adcs x29, x27, x25
+adcs xzr, x3, x4
+adcs x9, xzr, x10
+adcs x20, x0, xzr
+sbc w29, w27, w25
+sbc wzr, w3, w4
+ngc w9, w10
+sbc w20, w0, wzr
+sbc x29, x27, x25
+sbc xzr, x3, x4
+ngc x9, x10
+sbc x20, x0, xzr
+sbcs w29, w27, w25
+sbcs wzr, w3, w4
+ngcs w9, w10
+sbcs w20, w0, wzr
+sbcs x29, x27, x25
+sbcs xzr, x3, x4
+ngcs x9, x10
+sbcs x20, x0, xzr
+ngc w3, w12
+ngc wzr, w9
+ngc w23, wzr
+ngc x29, x30
+ngc xzr, x0
+ngc x0, xzr
+ngcs w3, w12
+ngcs wzr, w9
+ngcs w23, wzr
+ngcs x29, x30
+ngcs xzr, x0
+ngcs x0, xzr
+
+#------------------------------------------------------------------------------
+# Compare and branch (immediate)
+#------------------------------------------------------------------------------
+
+sbfx x1, x2, #3, #2
+asr x3, x4, #63
+asr wzr, wzr, #31
+sbfx w12, w9, #0, #1
+ubfiz x4, x5, #52, #11
+ubfx xzr, x4, #0, #1
+ubfiz x4, xzr, #1, #6
+lsr x5, x6, #12
+bfi x4, x5, #52, #11
+bfxil xzr, x4, #0, #1
+bfi x4, xzr, #1, #6
+bfxil x5, x6, #12, #52
+sxtb w1, w2
+sxtb xzr, w3
+sxth w9, w10
+sxth x0, w1
+sxtw x3, w30
+uxtb w1, w2
+uxth w9, w10
+ubfx x3, x30, #0, #32
+asr w3, w2, #0
+asr w9, w10, #31
+asr x20, x21, #63
+asr w1, wzr, #3
+lsr w3, w2, #0
+lsr w9, w10, #31
+lsr x20, x21, #63
+lsr wzr, wzr, #3
+lsr w3, w2, #0
+lsl w9, w10, #31
+lsl x20, x21, #63
+lsl w1, wzr, #3
+sbfx w9, w10, #0, #1
+sbfiz x2, x3, #63, #1
+asr x19, x20, #0
+sbfiz x9, x10, #5, #59
+asr w9, w10, #0
+sbfiz w11, w12, #31, #1
+sbfiz w13, w14, #29, #3
+sbfiz xzr, xzr, #10, #11
+sbfx w9, w10, #0, #1
+asr x2, x3, #63
+asr x19, x20, #0
+asr x9, x10, #5
+asr w9, w10, #0
+asr w11, w12, #31
+asr w13, w14, #29
+sbfx xzr, xzr, #10, #11
+bfxil w9, w10, #0, #1
+bfi x2, x3, #63, #1
+bfxil x19, x20, #0, #64
+bfi x9, x10, #5, #59
+bfxil w9, w10, #0, #32
+bfi w11, w12, #31, #1
+bfi w13, w14, #29, #3
+bfi xzr, xzr, #10, #11
+bfxil w9, w10, #0, #1
+bfxil x2, x3, #63, #1
+bfxil x19, x20, #0, #64
+bfxil x9, x10, #5, #59
+bfxil w9, w10, #0, #32
+bfxil w11, w12, #31, #1
+bfxil w13, w14, #29, #3
+bfxil xzr, xzr, #10, #11
+ubfx w9, w10, #0, #1
+lsl x2, x3, #63
+lsr x19, x20, #0
+lsl x9, x10, #5
+lsr w9, w10, #0
+lsl w11, w12, #31
+lsl w13, w14, #29
+ubfiz xzr, xzr, #10, #11
+ubfx w9, w10, #0, #1
+lsr x2, x3, #63
+lsr x19, x20, #0
+lsr x9, x10, #5
+lsr w9, w10, #0
+lsr w11, w12, #31
+lsr w13, w14, #29
+ubfx xzr, xzr, #10, #11
+
+#------------------------------------------------------------------------------
+# Compare and branch (immediate)
+#------------------------------------------------------------------------------
+
+cbz w5, #4
+cbz x5, #0
+cbnz x2, #-4
+cbnz x26, #1048572
+cbz wzr, #0
+cbnz xzr, #0
+
+#------------------------------------------------------------------------------
+# Conditional branch (immediate)
+#------------------------------------------------------------------------------
+
+b.ne #4
+b.ge #1048572
+b.ge #-4
+
+#------------------------------------------------------------------------------
+# Conditional compare (immediate)
+#------------------------------------------------------------------------------
+
+ccmp w1, #31, #0, eq
+ccmp w3, #0, #15, hs
+ccmp wzr, #15, #13, hs
+ccmp x9, #31, #0, le
+ccmp x3, #0, #15, gt
+ccmp xzr, #5, #7, ne
+ccmn w1, #31, #0, eq
+ccmn w3, #0, #15, hs
+ccmn wzr, #15, #13, hs
+ccmn x9, #31, #0, le
+ccmn x3, #0, #15, gt
+ccmn xzr, #5, #7, ne
+
+#------------------------------------------------------------------------------
+# Conditional compare (register)
+#------------------------------------------------------------------------------
+
+ccmp w1, wzr, #0, eq
+ccmp w3, w0, #15, hs
+ccmp wzr, w15, #13, hs
+ccmp x9, xzr, #0, le
+ccmp x3, x0, #15, gt
+ccmp xzr, x5, #7, ne
+ccmn w1, wzr, #0, eq
+ccmn w3, w0, #15, hs
+ccmn wzr, w15, #13, hs
+ccmn x9, xzr, #0, le
+ccmn x3, x0, #15, gt
+ccmn xzr, x5, #7, ne
+
+#------------------------------------------------------------------------------
+# Conditional branch (immediate)
+#------------------------------------------------------------------------------
+
+csel w1, w0, w19, ne
+csel wzr, w5, w9, eq
+csel w9, wzr, w30, gt
+csel w1, w28, wzr, mi
+csel x19, x23, x29, lt
+csel xzr, x3, x4, ge
+csel x5, xzr, x6, hs
+csel x7, x8, xzr, lo
+csinc w1, w0, w19, ne
+csinc wzr, w5, w9, eq
+csinc w9, wzr, w30, gt
+csinc w1, w28, wzr, mi
+csinc x19, x23, x29, lt
+csinc xzr, x3, x4, ge
+csinc x5, xzr, x6, hs
+csinc x7, x8, xzr, lo
+csinv w1, w0, w19, ne
+csinv wzr, w5, w9, eq
+csinv w9, wzr, w30, gt
+csinv w1, w28, wzr, mi
+csinv x19, x23, x29, lt
+csinv xzr, x3, x4, ge
+csinv x5, xzr, x6, hs
+csinv x7, x8, xzr, lo
+csneg w1, w0, w19, ne
+csneg wzr, w5, w9, eq
+csneg w9, wzr, w30, gt
+csneg w1, w28, wzr, mi
+csneg x19, x23, x29, lt
+csneg xzr, x3, x4, ge
+csneg x5, xzr, x6, hs
+csneg x7, x8, xzr, lo
+cset w3, eq
+cset x9, pl
+csetm w20, ne
+csetm x30, ge
+csinc w2, wzr, wzr, al
+csinv x3, xzr, xzr, nv
+cinc w3, w5, gt
+cinc wzr, w4, le
+cset w9, lt
+cinc x3, x5, gt
+cinc xzr, x4, le
+cset x9, lt
+csinc w5, w6, w6, nv
+csinc x1, x2, x2, al
+cinv w3, w5, gt
+cinv wzr, w4, le
+csetm w9, lt
+cinv x3, x5, gt
+cinv xzr, x4, le
+csetm x9, lt
+csinv x1, x0, x0, al
+csinv w9, w8, w8, nv
+cneg w3, w5, gt
+cneg wzr, w4, le
+cneg w9, wzr, lt
+cneg x3, x5, gt
+cneg xzr, x4, le
+cneg x9, xzr, lt
+csneg x4, x8, x8, al
+csinv w9, w8, w8, nv
+
+#------------------------------------------------------------------------------
+# Data-processing (1 source)
+#------------------------------------------------------------------------------
+
+rbit w0, w7
+rbit x18, x3
+rev16 w17, w1
+rev16 x5, x2
+rev w18, w0
+rev32 x20, x1
+rev x22, x2
+clz w24, w3
+clz x26, x4
+cls w3, w5
+cls x20, x5
+
+#------------------------------------------------------------------------------
+# Data-processing (2 source)
+#------------------------------------------------------------------------------
+
+udiv w0, w7, w10
+udiv x9, x22, x4
+sdiv w12, w21, w0
+sdiv x13, x2, x1
+lsl w11, w12, w13
+lsl x14, x15, x16
+lsr w17, w18, w19
+lsr x20, x21, x22
+asr w23, w24, w25
+asr x26, x27, x28
+ror w0, w1, w2
+ror x3, x4, x5
+lsl w6, w7, w8
+lsl x9, x10, x11
+lsr w12, w13, w14
+lsr x15, x16, x17
+asr w18, w19, w20
+asr x21, x22, x23
+ror w24, w25, w26
+ror x27, x28, x29
+
+#------------------------------------------------------------------------------
+# Data-processing (3 sources)
+#------------------------------------------------------------------------------
+
+smulh x30, x29, x28
+smulh xzr, x27, x26
+umulh x30, x29, x28
+umulh x23, x30, xzr
+madd w1, w3, w7, w4
+madd wzr, w0, w9, w11
+madd w13, wzr, w4, w4
+madd w19, w30, wzr, w29
+mul w4, w5, w6
+madd x1, x3, x7, x4
+madd xzr, x0, x9, x11
+madd x13, xzr, x4, x4
+madd x19, x30, xzr, x29
+mul x4, x5, x6
+msub w1, w3, w7, w4
+msub wzr, w0, w9, w11
+msub w13, wzr, w4, w4
+msub w19, w30, wzr, w29
+mneg w4, w5, w6
+msub x1, x3, x7, x4
+msub xzr, x0, x9, x11
+msub x13, xzr, x4, x4
+msub x19, x30, xzr, x29
+mneg x4, x5, x6
+smaddl x3, w5, w2, x9
+smaddl xzr, w10, w11, x12
+smaddl x13, wzr, w14, x15
+smaddl x16, w17, wzr, x18
+smull x19, w20, w21
+smsubl x3, w5, w2, x9
+smsubl xzr, w10, w11, x12
+smsubl x13, wzr, w14, x15
+smsubl x16, w17, wzr, x18
+smnegl x19, w20, w21
+umaddl x3, w5, w2, x9
+umaddl xzr, w10, w11, x12
+umaddl x13, wzr, w14, x15
+umaddl x16, w17, wzr, x18
+umull x19, w20, w21
+umsubl x3, w5, w2, x9
+umsubl x16, w17, wzr, x18
+umnegl x19, w20, w21
+smulh x30, x29, x28
+smulh x23, x22, xzr
+umulh x23, x22, xzr
+mul x19, x20, xzr
+mneg w21, w22, w23
+smull x11, w13, w17
+umull x11, w13, w17
+smnegl x11, w13, w17
+umnegl x11, w13, w17
+
+#------------------------------------------------------------------------------
+# Extract (immediate)
+#------------------------------------------------------------------------------
+
+extr w3, w5, w7, #0
+extr w11, w13, w17, #31
+extr x3, x5, x7, #15
+extr x11, x13, x17, #63
+ror x19, x23, #24
+ror x29, xzr, #63
+ror w9, w13, #31
+
+#------------------------------------------------------------------------------
+# Floating-point compare
+#------------------------------------------------------------------------------
+
+fcmp s3, s5
+fcmp s31, #0.0
+fcmp s31, #0.0
+fcmpe s29, s30
+fcmpe s15, #0.0
+fcmpe s15, #0.0
+fcmp d4, d12
+fcmp d23, #0.0
+fcmp d23, #0.0
+fcmpe d26, d22
+fcmpe d29, #0.0
+fcmpe d29, #0.0
+
+#------------------------------------------------------------------------------
+# Floating-point conditional compare
+#------------------------------------------------------------------------------
+
+fccmp s1, s31, #0, eq
+fccmp s3, s0, #15, hs
+fccmp s31, s15, #13, hs
+fccmp d9, d31, #0, le
+fccmp d3, d0, #15, gt
+fccmp d31, d5, #7, ne
+fccmpe s1, s31, #0, eq
+fccmpe s3, s0, #15, hs
+fccmpe s31, s15, #13, hs
+fccmpe d9, d31, #0, le
+fccmpe d3, d0, #15, gt
+fccmpe d31, d5, #7, ne
+
+#-------------------------------------------------------------------------------
+# Floating-point conditional compare
+#-------------------------------------------------------------------------------
+
+fcsel s3, s20, s9, pl
+fcsel d9, d10, d11, mi
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (1 source)
+#------------------------------------------------------------------------------
+
+fmov s0, s1
+fabs s2, s3
+fneg s4, s5
+fsqrt s6, s7
+fcvt d8, s9
+fcvt h10, s11
+frintn s12, s13
+frintp s14, s15
+frintm s16, s17
+frintz s18, s19
+frinta s20, s21
+frintx s22, s23
+frinti s24, s25
+fmov d0, d1
+fabs d2, d3
+fneg d4, d5
+fsqrt d6, d7
+fcvt s8, d9
+fcvt h10, d11
+frintn d12, d13
+frintp d14, d15
+frintm d16, d17
+frintz d18, d19
+frinta d20, d21
+frintx d22, d23
+frinti d24, d25
+fcvt s26, h27
+fcvt d28, h29
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (2 sources)
+#------------------------------------------------------------------------------
+
+fmul s20, s19, s17
+fdiv s1, s2, s3
+fadd s4, s5, s6
+fsub s7, s8, s9
+fmax s10, s11, s12
+fmin s13, s14, s15
+fmaxnm s16, s17, s18
+fminnm s19, s20, s21
+fnmul s22, s23, s2
+fmul d20, d19, d17
+fdiv d1, d2, d3
+fadd d4, d5, d6
+fsub d7, d8, d9
+fmax d10, d11, d12
+fmin d13, d14, d15
+fmaxnm d16, d17, d18
+fminnm d19, d20, d21
+fnmul d22, d23, d24
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (1 source)
+#------------------------------------------------------------------------------
+
+fmadd s3, s5, s6, s31
+fmadd d3, d13, d0, d23
+fmsub s3, s5, s6, s31
+fmsub d3, d13, d0, d23
+fnmadd s3, s5, s6, s31
+fnmadd d3, d13, d0, d23
+fnmsub s3, s5, s6, s31
+fnmsub d3, d13, d0, d23
+
+#------------------------------------------------------------------------------
+# Floating-point <-> fixed-point conversion
+#------------------------------------------------------------------------------
+
+fcvtzs w3, h5, #1
+fcvtzs wzr, h20, #13
+fcvtzs w19, h0, #32
+fcvtzs x3, h5, #1
+fcvtzs x12, h30, #45
+fcvtzs x19, h0, #64
+fcvtzs w3, s5, #1
+fcvtzs wzr, s20, #13
+fcvtzs w19, s0, #32
+fcvtzs x3, s5, #1
+fcvtzs x12, s30, #45
+fcvtzs x19, s0, #64
+fcvtzs w3, d5, #1
+fcvtzs wzr, d20, #13
+fcvtzs w19, d0, #32
+fcvtzs x3, d5, #1
+fcvtzs x12, d30, #45
+fcvtzs x19, d0, #64
+fcvtzu w3, h5, #1
+fcvtzu wzr, h20, #13
+fcvtzu w19, h0, #32
+fcvtzu x3, h5, #1
+fcvtzu x12, h30, #45
+fcvtzu x19, h0, #64
+fcvtzu w3, s5, #1
+fcvtzu wzr, s20, #13
+fcvtzu w19, s0, #32
+fcvtzu x3, s5, #1
+fcvtzu x12, s30, #45
+fcvtzu x19, s0, #64
+fcvtzu w3, d5, #1
+fcvtzu wzr, d20, #13
+fcvtzu w19, d0, #32
+fcvtzu x3, d5, #1
+fcvtzu x12, d30, #45
+fcvtzu x19, d0, #64
+scvtf h23, w19, #1
+scvtf h31, wzr, #20
+scvtf h14, w0, #32
+scvtf h23, x19, #1
+scvtf h31, xzr, #20
+scvtf h14, x0, #64
+scvtf s23, w19, #1
+scvtf s31, wzr, #20
+scvtf s14, w0, #32
+scvtf s23, x19, #1
+scvtf s31, xzr, #20
+scvtf s14, x0, #64
+scvtf d23, w19, #1
+scvtf d31, wzr, #20
+scvtf d14, w0, #32
+scvtf d23, x19, #1
+scvtf d31, xzr, #20
+scvtf d14, x0, #64
+ucvtf h23, w19, #1
+ucvtf h31, wzr, #20
+ucvtf h14, w0, #32
+ucvtf h23, x19, #1
+ucvtf h31, xzr, #20
+ucvtf h14, x0, #64
+ucvtf s23, w19, #1
+ucvtf s31, wzr, #20
+ucvtf s14, w0, #32
+ucvtf s23, x19, #1
+ucvtf s31, xzr, #20
+ucvtf s14, x0, #64
+ucvtf d23, w19, #1
+ucvtf d31, wzr, #20
+ucvtf d14, w0, #32
+ucvtf d23, x19, #1
+ucvtf d31, xzr, #20
+ucvtf d14, x0, #64
+
+#------------------------------------------------------------------------------
+# Floating-point <-> integer conversion
+#------------------------------------------------------------------------------
+
+fcvtns w3, h31
+fcvtns xzr, h12
+fcvtnu wzr, h12
+fcvtnu x0, h0
+fcvtps wzr, h9
+fcvtps x12, h20
+fcvtpu w30, h23
+fcvtpu x29, h3
+fcvtms w2, h3
+fcvtms x4, h5
+fcvtmu w6, h7
+fcvtmu x8, h9
+fcvtzs w10, h11
+fcvtzs x12, h13
+fcvtzu w14, h15
+fcvtzu x15, h16
+scvtf h17, w18
+scvtf h19, x20
+ucvtf h21, w22
+scvtf h23, x24
+fcvtas w25, h26
+fcvtas x27, h28
+fcvtau w29, h30
+fcvtau xzr, h0
+fcvtns w3, s31
+fcvtns xzr, s12
+fcvtnu wzr, s12
+fcvtnu x0, s0
+fcvtps wzr, s9
+fcvtps x12, s20
+fcvtpu w30, s23
+fcvtpu x29, s3
+fcvtms w2, s3
+fcvtms x4, s5
+fcvtmu w6, s7
+fcvtmu x8, s9
+fcvtzs w10, s11
+fcvtzs x12, s13
+fcvtzu w14, s15
+fcvtzu x15, s16
+scvtf s17, w18
+scvtf s19, x20
+ucvtf s21, w22
+scvtf s23, x24
+fcvtas w25, s26
+fcvtas x27, s28
+fcvtau w29, s30
+fcvtau xzr, s0
+fcvtns w3, d31
+fcvtns xzr, d12
+fcvtnu wzr, d12
+fcvtnu x0, d0
+fcvtps wzr, d9
+fcvtps x12, d20
+fcvtpu w30, d23
+fcvtpu x29, d3
+fcvtms w2, d3
+fcvtms x4, d5
+fcvtmu w6, d7
+fcvtmu x8, d9
+fcvtzs w10, d11
+fcvtzs x12, d13
+fcvtzu w14, d15
+fcvtzu x15, d16
+scvtf d17, w18
+scvtf d19, x20
+ucvtf d21, w22
+ucvtf d23, x24
+fcvtas w25, d26
+fcvtas x27, d28
+fcvtau w29, d30
+fcvtau xzr, d0
+fmov w3, s9
+fmov s9, w3
+fmov x20, d31
+fmov d1, x15
+fmov x3, v12.d[1]
+fmov v1.d[1], x19
+
+#------------------------------------------------------------------------------
+# Floating-point immediate
+#------------------------------------------------------------------------------
+
+fmov s2, #0.12500000
+fmov s3, #1.00000000
+fmov d30, #16.00000000
+fmov s4, #1.06250000
+fmov d10, #1.93750000
+fmov s12, #-1.00000000
+fmov d16, #8.50000000
+
+#------------------------------------------------------------------------------
+# Load-register (literal)
+#------------------------------------------------------------------------------
+
+ldr w3, #0
+ldr x29, #4
+ldrsw xzr, #-4
+ldr s0, #8
+ldr d0, #1048572
+ldr q0, #-1048576
+prfm pldl1strm, #0
+prfm #22, #0
+
+#------------------------------------------------------------------------------
+# Load/store exclusive
+#------------------------------------------------------------------------------
+
+stxrb w18, w8, [sp]
+stxrh w24, w15, [x16]
+stxr w5, w6, [x17]
+stxr w1, x10, [x21]
+ldxrb w30, [x0]
+ldxrh w17, [x4]
+ldxr w22, [sp]
+ldxr x11, [x29]
+ldxr x11, [x29]
+ldxr x11, [x29]
+stxp w12, w11, w10, [sp]
+stxp wzr, x27, x9, [x12]
+ldxp w0, wzr, [sp]
+ldxp x17, x0, [x18]
+ldxp x17, x0, [x18]
+stlxrb w12, w22, [x0]
+stlxrh w10, w1, [x1]
+stlxr w9, w2, [x2]
+stlxr w9, x3, [sp]
+ldaxrb w8, [x4]
+ldaxrh w7, [x5]
+ldaxr w6, [sp]
+ldaxr x5, [x6]
+ldaxr x5, [x6]
+ldaxr x5, [x6]
+stlxp w4, w5, w6, [sp]
+stlxp wzr, x6, x7, [x1]
+ldaxp w5, w18, [sp]
+ldaxp x6, x19, [x22]
+ldaxp x6, x19, [x22]
+stlrb w24, [sp]
+stlrh w25, [x30]
+stlr w26, [x29]
+stlr x27, [x28]
+stlr x27, [x28]
+stlr x27, [x28]
+ldarb w23, [sp]
+ldarh w22, [x30]
+ldar wzr, [x29]
+ldar x21, [x28]
+ldar x21, [x28]
+ldar x21, [x28]
+
+#------------------------------------------------------------------------------
+# Load/store (unscaled immediate)
+#------------------------------------------------------------------------------
+
+sturb w9, [sp]
+sturh wzr, [x12, #255]
+stur w16, [x0, #-256]
+stur x28, [x14, #1]
+ldurb w1, [x20, #255]
+ldurh w20, [x1, #255]
+ldur w12, [sp, #255]
+ldur xzr, [x12, #255]
+ldursb x9, [x7, #-256]
+ldursh x17, [x19, #-256]
+ldursw x20, [x15, #-256]
+prfum pldl2keep, [sp, #-256]
+ldursb w19, [x1, #-256]
+ldursh w15, [x21, #-256]
+stur b0, [sp, #1]
+stur h12, [x12, #-1]
+stur s15, [x0, #255]
+stur d31, [x5, #25]
+stur q9, [x5]
+ldur b3, [sp]
+ldur h5, [x4, #-256]
+ldur s7, [x12, #-1]
+ldur d11, [x19, #4]
+ldur q13, [x1, #2]
+
+#------------------------------------------------------------------------------
+# Load/store (immediate post-indexed)
+#------------------------------------------------------------------------------
+
+strb w9, [x2], #255
+strb w10, [x3], #1
+strb w10, [x3], #-256
+strh w9, [x2], #255
+strh w9, [x2], #1
+strh w10, [x3], #-256
+str w19, [sp], #255
+str w20, [x30], #1
+str w21, [x12], #-256
+str xzr, [x9], #255
+str x2, [x3], #1
+str x19, [x12], #-256
+ldrb w9, [x2], #255
+ldrb w10, [x3], #1
+ldrb w10, [x3], #-256
+ldrh w9, [x2], #255
+ldrh w9, [x2], #1
+ldrh w10, [x3], #-256
+ldr w19, [sp], #255
+ldr w20, [x30], #1
+ldr w21, [x12], #-256
+ldr xzr, [x9], #255
+ldr x2, [x3], #1
+ldr x19, [x12], #-256
+ldrsb xzr, [x9], #255
+ldrsb x2, [x3], #1
+ldrsb x19, [x12], #-256
+ldrsh xzr, [x9], #255
+ldrsh x2, [x3], #1
+ldrsh x19, [x12], #-256
+ldrsw xzr, [x9], #255
+ldrsw x2, [x3], #1
+ldrsw x19, [x12], #-256
+ldrsb wzr, [x9], #255
+ldrsb w2, [x3], #1
+ldrsb w19, [x12], #-256
+ldrsh wzr, [x9], #255
+ldrsh w2, [x3], #1
+ldrsh w19, [x12], #-256
+str b0, [x0], #255
+str b3, [x3], #1
+str b5, [sp], #-256
+str h10, [x10], #255
+str h13, [x23], #1
+str h15, [sp], #-256
+str s20, [x20], #255
+str s23, [x23], #1
+str s25, [x0], #-256
+str d20, [x20], #255
+str d23, [x23], #1
+str d25, [x0], #-256
+ldr b0, [x0], #255
+ldr b3, [x3], #1
+ldr b5, [sp], #-256
+ldr h10, [x10], #255
+ldr h13, [x23], #1
+ldr h15, [sp], #-256
+ldr s20, [x20], #255
+ldr s23, [x23], #1
+ldr s25, [x0], #-256
+ldr d20, [x20], #255
+ldr d23, [x23], #1
+ldr d25, [x0], #-256
+ldr q20, [x1], #255
+ldr q23, [x9], #1
+ldr q25, [x20], #-256
+str q10, [x1], #255
+str q22, [sp], #1
+str q21, [x20], #-256
+
+#-------------------------------------------------------------------------------
+# Load-store register (immediate pre-indexed)
+#-------------------------------------------------------------------------------
+
+ldr x3, [x4, #0]!
+strb w9, [x2, #255]!
+strb w10, [x3, #1]!
+strb w10, [x3, #-256]!
+strh w9, [x2, #255]!
+strh w9, [x2, #1]!
+strh w10, [x3, #-256]!
+str w19, [sp, #255]!
+str w20, [x30, #1]!
+str w21, [x12, #-256]!
+str xzr, [x9, #255]!
+str x2, [x3, #1]!
+str x19, [x12, #-256]!
+ldrb w9, [x2, #255]!
+ldrb w10, [x3, #1]!
+ldrb w10, [x3, #-256]!
+ldrh w9, [x2, #255]!
+ldrh w9, [x2, #1]!
+ldrh w10, [x3, #-256]!
+ldr w19, [sp, #255]!
+ldr w20, [x30, #1]!
+ldr w21, [x12, #-256]!
+ldr xzr, [x9, #255]!
+ldr x2, [x3, #1]!
+ldr x19, [x12, #-256]!
+ldrsb xzr, [x9, #255]!
+ldrsb x2, [x3, #1]!
+ldrsb x19, [x12, #-256]!
+ldrsh xzr, [x9, #255]!
+ldrsh x2, [x3, #1]!
+ldrsh x19, [x12, #-256]!
+ldrsw xzr, [x9, #255]!
+ldrsw x2, [x3, #1]!
+ldrsw x19, [x12, #-256]!
+ldrsb wzr, [x9, #255]!
+ldrsb w2, [x3, #1]!
+ldrsb w19, [x12, #-256]!
+ldrsh wzr, [x9, #255]!
+ldrsh w2, [x3, #1]!
+ldrsh w19, [x12, #-256]!
+str b0, [x0, #255]!
+str b3, [x3, #1]!
+str b5, [sp, #-256]!
+str h10, [x10, #255]!
+str h13, [x23, #1]!
+str h15, [sp, #-256]!
+str s20, [x20, #255]!
+str s23, [x23, #1]!
+str s25, [x0, #-256]!
+str d20, [x20, #255]!
+str d23, [x23, #1]!
+str d25, [x0, #-256]!
+ldr b0, [x0, #255]!
+ldr b3, [x3, #1]!
+ldr b5, [sp, #-256]!
+ldr h10, [x10, #255]!
+ldr h13, [x23, #1]!
+ldr h15, [sp, #-256]!
+ldr s20, [x20, #255]!
+ldr s23, [x23, #1]!
+ldr s25, [x0, #-256]!
+ldr d20, [x20, #255]!
+ldr d23, [x23, #1]!
+ldr d25, [x0, #-256]!
+ldr q20, [x1, #255]!
+ldr q23, [x9, #1]!
+ldr q25, [x20, #-256]!
+str q10, [x1, #255]!
+str q22, [sp, #1]!
+str q21, [x20, #-256]!
+
+#------------------------------------------------------------------------------
+# Load/store (unprivileged)
+#------------------------------------------------------------------------------
+
+sttrb w9, [sp]
+sttrh wzr, [x12, #255]
+sttr w16, [x0, #-256]
+sttr x28, [x14, #1]
+ldtrb w1, [x20, #255]
+ldtrh w20, [x1, #255]
+ldtr w12, [sp, #255]
+ldtr xzr, [x12, #255]
+ldtrsb x9, [x7, #-256]
+ldtrsh x17, [x19, #-256]
+ldtrsw x20, [x15, #-256]
+ldtrsb w19, [x1, #-256]
+ldtrsh w15, [x21, #-256]
+
+#------------------------------------------------------------------------------
+# Load/store (unsigned immediate)
+#------------------------------------------------------------------------------
+
+ldr x4, [x29]
+ldr x30, [x12, #32760]
+ldr x20, [sp, #8]
+ldr xzr, [sp]
+ldr w2, [sp]
+ldr w17, [sp, #16380]
+ldr w13, [x2, #4]
+ldrsw x2, [x5, #4]
+ldrsw x23, [sp, #16380]
+ldrh w2, [x4]
+ldrsh w23, [x6, #8190]
+ldrsh wzr, [sp, #2]
+ldrsh x29, [x2, #2]
+ldrb w26, [x3, #121]
+ldrb w12, [x2]
+ldrsb w27, [sp, #4095]
+ldrsb xzr, [x15]
+str x30, [sp]
+str w20, [x4, #16380]
+strh w17, [sp, #8190]
+strb w23, [x3, #4095]
+strb wzr, [x2]
+ldr b31, [sp, #4095]
+ldr h20, [x2, #8190]
+ldr s10, [x19, #16380]
+ldr d3, [x10, #32760]
+str q12, [sp, #65520]
+
+#------------------------------------------------------------------------------
+# Load/store (register offset)
+#------------------------------------------------------------------------------
+
+ldr h3, [sp, x5]
+ldr h9, [x27, x6]
+ldr h10, [x30, x7, lsl #1]
+str h11, [x29, x3, sxtx]
+str h12, [x28, xzr, sxtx]
+str h13, [x27, x5, sxtx #1]
+ldr h14, [x26, w6, uxtw]
+ldr h15, [x25, w7, uxtw]
+ldr h16, [x24, w8, uxtw #1]
+ldr h17, [x23, w9, sxtw]
+str h18, [x22, w10, sxtw]
+ldr h19, [x21, wzr, sxtw #1]
+ldrb w3, [sp, x5]
+ldrb w9, [x27, x6]
+ldrsb w10, [x30, x7]
+ldrb w11, [x29, x3, sxtx]
+strb w12, [x28, xzr, sxtx]
+ldrb w14, [x26, w6, uxtw]
+ldrsb w15, [x25, w7, uxtw]
+ldrb w17, [x23, w9, sxtw]
+ldrsb x18, [x22, w10, sxtw]
+ldrsh w3, [sp, x5]
+ldrsh w9, [x27, x6]
+ldrh w10, [x30, x7, lsl #1]
+strh w11, [x29, x3, sxtx]
+ldrh w12, [x28, xzr, sxtx]
+ldrsh x13, [x27, x5, sxtx #1]
+ldrh w14, [x26, w6, uxtw]
+ldrh w15, [x25, w7, uxtw]
+ldrsh w16, [x24, w8, uxtw #1]
+ldrh w17, [x23, w9, sxtw]
+ldrh w18, [x22, w10, sxtw]
+strh w19, [x21, wzr, sxtw #1]
+ldr w3, [sp, x5]
+ldr s9, [x27, x6]
+ldr w10, [x30, x7, lsl #2]
+ldr w11, [x29, x3, sxtx]
+str s12, [x28, xzr, sxtx]
+str w13, [x27, x5, sxtx #2]
+str w14, [x26, w6, uxtw]
+ldr w15, [x25, w7, uxtw]
+ldr w16, [x24, w8, uxtw #2]
+ldrsw x17, [x23, w9, sxtw]
+ldr w18, [x22, w10, sxtw]
+ldrsw x19, [x21, wzr, sxtw #2]
+ldr x3, [sp, x5]
+str x9, [x27, x6]
+ldr d10, [x30, x7, lsl #3]
+str x11, [x29, x3, sxtx]
+ldr x12, [x28, xzr, sxtx]
+ldr x13, [x27, x5, sxtx #3]
+prfm pldl1keep, [x26, w6, uxtw]
+ldr x15, [x25, w7, uxtw]
+ldr x16, [x24, w8, uxtw #3]
+ldr x17, [x23, w9, sxtw]
+ldr x18, [x22, w10, sxtw]
+str d19, [x21, wzr, sxtw #3]
+ldr q3, [sp, x5]
+ldr q9, [x27, x6]
+ldr q10, [x30, x7, lsl #4]
+str q11, [x29, x3, sxtx]
+str q12, [x28, xzr, sxtx]
+str q13, [x27, x5, sxtx #4]
+ldr q14, [x26, w6, uxtw]
+ldr q15, [x25, w7, uxtw]
+ldr q16, [x24, w8, uxtw #4]
+ldr q17, [x23, w9, sxtw]
+str q18, [x22, w10, sxtw]
+ldr q19, [x21, wzr, sxtw #4]
+
+#------------------------------------------------------------------------------
+# Load/store register pair (offset)
+#------------------------------------------------------------------------------
+
+ldp w3, w5, [sp]
+stp wzr, w9, [sp, #252]
+ldp w2, wzr, [sp, #-256]
+ldp w9, w10, [sp, #4]
+ldpsw x9, x10, [sp, #4]
+ldpsw x9, x10, [x2, #-256]
+ldpsw x20, x30, [sp, #252]
+ldp x21, x29, [x2, #504]
+ldp x22, x23, [x3, #-512]
+ldp x24, x25, [x4, #8]
+ldp s29, s28, [sp, #252]
+stp s27, s26, [sp, #-256]
+ldp s1, s2, [x3, #44]
+stp d3, d5, [x9, #504]
+stp d7, d11, [x10, #-512]
+ldp d2, d3, [x30, #-8]
+stp q3, q5, [sp]
+stp q17, q19, [sp, #1008]
+ldp q23, q29, [x1, #-1024]
+
+#------------------------------------------------------------------------------
+# Load/store register pair (post-indexed)
+#------------------------------------------------------------------------------
+
+ldp w3, w5, [sp], #0
+stp wzr, w9, [sp], #252
+ldp w2, wzr, [sp], #-256
+ldp w9, w10, [sp], #4
+ldpsw x9, x10, [sp], #4
+ldpsw x9, x10, [x2], #-256
+ldpsw x20, x30, [sp], #252
+ldp x21, x29, [x2], #504
+ldp x22, x23, [x3], #-512
+ldp x24, x25, [x4], #8
+ldp s29, s28, [sp], #252
+stp s27, s26, [sp], #-256
+ldp s1, s2, [x3], #44
+stp d3, d5, [x9], #504
+stp d7, d11, [x10], #-512
+ldp d2, d3, [x30], #-8
+stp q3, q5, [sp], #0
+stp q17, q19, [sp], #1008
+ldp q23, q29, [x1], #-1024
+
+#------------------------------------------------------------------------------
+# Load/store register pair (pre-indexed)
+#------------------------------------------------------------------------------
+
+ldp w3, w5, [sp, #0]!
+stp wzr, w9, [sp, #252]!
+ldp w2, wzr, [sp, #-256]!
+ldp w9, w10, [sp, #4]!
+ldpsw x9, x10, [sp, #4]!
+ldpsw x9, x10, [x2, #-256]!
+ldpsw x20, x30, [sp, #252]!
+ldp x21, x29, [x2, #504]!
+ldp x22, x23, [x3, #-512]!
+ldp x24, x25, [x4, #8]!
+ldp s29, s28, [sp, #252]!
+stp s27, s26, [sp, #-256]!
+ldp s1, s2, [x3, #44]!
+stp d3, d5, [x9, #504]!
+stp d7, d11, [x10, #-512]!
+ldp d2, d3, [x30, #-8]!
+stp q3, q5, [sp, #0]!
+stp q17, q19, [sp, #1008]!
+ldp q23, q29, [x1, #-1024]!
+
+#------------------------------------------------------------------------------
+# Load/store register pair (offset)
+#------------------------------------------------------------------------------
+
+ldnp w3, w5, [sp]
+stnp wzr, w9, [sp, #252]
+ldnp w2, wzr, [sp, #-256]
+ldnp w9, w10, [sp, #4]
+ldnp x21, x29, [x2, #504]
+ldnp x22, x23, [x3, #-512]
+ldnp x24, x25, [x4, #8]
+ldnp s29, s28, [sp, #252]
+stnp s27, s26, [sp, #-256]
+ldnp s1, s2, [x3, #44]
+stnp d3, d5, [x9, #504]
+stnp d7, d11, [x10, #-512]
+ldnp d2, d3, [x30, #-8]
+stnp q3, q5, [sp]
+stnp q17, q19, [sp, #1008]
+ldnp q23, q29, [x1, #-1024]
+
+#------------------------------------------------------------------------------
+# Logical (immediate)
+#------------------------------------------------------------------------------
+
+mov w3, #983055
+mov x10, #-6148914691236517206
+ands w4, w4, #983055
+ands x11, x11, #-6148914691236517206
+
+#------------------------------------------------------------------------------
+# Logical (shifted register)
+#------------------------------------------------------------------------------
+
+and w12, w23, w21
+and w16, w15, w1, lsl #1
+and w9, w4, w10, lsl #31
+and w3, w30, w11
+and x3, x5, x7, lsl #63
+and x5, x14, x19, asr #4
+and w3, w17, w19, ror #31
+and w0, w2, wzr, lsr #17
+and w3, w30, w11, asr #2
+and xzr, x4, x26
+and w3, wzr, w20, ror #2
+and x7, x20, xzr, asr #63
+bic x13, x20, x14, lsl #47
+bic w2, w7, w9
+orr w2, w7, w0, asr #31
+orr x8, x9, x10, lsl #12
+orn x3, x5, x7, asr #2
+orn w2, w5, w29
+ands w7, wzr, w9, lsl #1
+ands x3, x5, x20, ror #63
+bics w3, w5, w7
+bics x3, xzr, x3, lsl #1
+tst w3, w7, lsl #31
+tst x2, x20, asr #2
+mov x3, x6
+mov x3, xzr
+mov wzr, w2
+mov w3, w5
+
+#------------------------------------------------------------------------------
+# Move wide (immediate)
+#------------------------------------------------------------------------------
+
+movz w2, #0, lsl #16
+mov w2, #-1235
+mov x2, #5299989643264
+mov x2, #0
+movk w3, #0
+movz x4, #0, lsl #16
+movk w5, #0, lsl #16
+movz x6, #0, lsl #32
+movk x7, #0, lsl #32
+movz x8, #0, lsl #48
+movk x9, #0, lsl #48
+
+#------------------------------------------------------------------------------
+# PC-relative addressing
+#------------------------------------------------------------------------------
+
+adr x2, #1600
+adrp x21, #6553600
+adr x0, #262144
+
+#------------------------------------------------------------------------------
+# Test and branch (immediate)
+#------------------------------------------------------------------------------
+
+tbz x12, #62, #0
+tbz x12, #62, #4
+tbz x12, #62, #-32768
+tbnz x12, #60, #32764
+
+#------------------------------------------------------------------------------
+# Unconditional branch (immediate)
+#------------------------------------------------------------------------------
+
+b #4
+b #-4
+b #134217724
+
+#------------------------------------------------------------------------------
+# Unconditional branch (register)
+#------------------------------------------------------------------------------
+
+br x20
+blr xzr
+ret x10
+ret
+eret
+drps
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.13 add w2, w3, #4095
+# CHECK-NEXT: 1 1 0.13 add w30, w29, #1, lsl #12
+# CHECK-NEXT: 1 1 0.13 add w13, w5, #4095, lsl #12
+# CHECK-NEXT: 1 1 0.13 add x5, x7, #1638
+# CHECK-NEXT: 1 1 0.13 add w20, wsp, #801
+# CHECK-NEXT: 1 1 0.13 add wsp, wsp, #1104
+# CHECK-NEXT: 1 1 0.13 add wsp, w30, #4084
+# CHECK-NEXT: 1 1 0.13 add x0, x24, #291
+# CHECK-NEXT: 1 1 0.13 add x3, x24, #4095, lsl #12
+# CHECK-NEXT: 1 1 0.13 add x8, sp, #1074
+# CHECK-NEXT: 1 1 0.13 add sp, x29, #3816
+# CHECK-NEXT: 1 1 0.13 sub w0, wsp, #4077
+# CHECK-NEXT: 1 1 0.13 sub w4, w20, #546, lsl #12
+# CHECK-NEXT: 1 1 0.13 sub sp, sp, #288
+# CHECK-NEXT: 1 1 0.13 sub wsp, w19, #16
+# CHECK-NEXT: 1 1 0.25 adds w13, w23, #291, lsl #12
+# CHECK-NEXT: 1 1 0.25 cmn w2, #4095
+# CHECK-NEXT: 1 1 0.25 adds w20, wsp, #0
+# CHECK-NEXT: 1 1 0.25 cmn x3, #1, lsl #12
+# CHECK-NEXT: 1 1 0.25 cmp sp, #20, lsl #12
+# CHECK-NEXT: 1 1 0.25 cmp x30, #4095
+# CHECK-NEXT: 1 1 0.25 subs x4, sp, #3822
+# CHECK-NEXT: 1 1 0.25 cmn w3, #291, lsl #12
+# CHECK-NEXT: 1 1 0.25 cmn wsp, #1365
+# CHECK-NEXT: 1 1 0.25 cmn sp, #1092, lsl #12
+# CHECK-NEXT: 1 1 0.13 mov sp, x30
+# CHECK-NEXT: 1 1 0.13 mov wsp, w20
+# CHECK-NEXT: 1 1 0.13 mov x11, sp
+# CHECK-NEXT: 1 1 0.13 mov w24, wsp
+# CHECK-NEXT: 1 1 0.13 add w3, w5, w7
+# CHECK-NEXT: 1 1 0.13 add wzr, w3, w5
+# CHECK-NEXT: 1 1 0.13 add w20, wzr, w4
+# CHECK-NEXT: 1 1 0.13 add w4, w6, wzr
+# CHECK-NEXT: 1 1 0.13 add w11, w13, w15
+# CHECK-NEXT: 1 2 0.50 add w9, w3, wzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 add w17, w29, w20, lsl #31
+# CHECK-NEXT: 1 2 0.50 add w21, w22, w23, lsr #0
+# CHECK-NEXT: 1 2 0.50 add w24, w25, w26, lsr #18
+# CHECK-NEXT: 1 2 0.50 add w27, w28, w29, lsr #31
+# CHECK-NEXT: 1 2 0.50 add w2, w3, w4, asr #0
+# CHECK-NEXT: 1 2 0.50 add w5, w6, w7, asr #21
+# CHECK-NEXT: 1 2 0.50 add w8, w9, w10, asr #31
+# CHECK-NEXT: 1 1 0.13 add x3, x5, x7
+# CHECK-NEXT: 1 1 0.13 add xzr, x3, x5
+# CHECK-NEXT: 1 1 0.13 add x20, xzr, x4
+# CHECK-NEXT: 1 1 0.13 add x4, x6, xzr
+# CHECK-NEXT: 1 1 0.13 add x11, x13, x15
+# CHECK-NEXT: 1 2 0.50 add x9, x3, xzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 add x17, x29, x20, lsl #63
+# CHECK-NEXT: 1 2 0.50 add x21, x22, x23, lsr #0
+# CHECK-NEXT: 1 2 0.50 add x24, x25, x26, lsr #18
+# CHECK-NEXT: 1 2 0.50 add x27, x28, x29, lsr #63
+# CHECK-NEXT: 1 2 0.50 add x2, x3, x4, asr #0
+# CHECK-NEXT: 1 2 0.50 add x5, x6, x7, asr #21
+# CHECK-NEXT: 1 2 0.50 add x8, x9, x10, asr #63
+# CHECK-NEXT: 1 1 0.25 adds w3, w5, w7
+# CHECK-NEXT: 1 1 0.25 cmn w3, w5
+# CHECK-NEXT: 1 1 0.25 adds w20, wzr, w4
+# CHECK-NEXT: 1 1 0.25 adds w4, w6, wzr
+# CHECK-NEXT: 1 1 0.25 adds w11, w13, w15
+# CHECK-NEXT: 1 2 0.50 adds w9, w3, wzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 adds w17, w29, w20, lsl #31
+# CHECK-NEXT: 1 2 0.50 adds w21, w22, w23, lsr #0
+# CHECK-NEXT: 1 2 0.50 adds w24, w25, w26, lsr #18
+# CHECK-NEXT: 1 2 0.50 adds w27, w28, w29, lsr #31
+# CHECK-NEXT: 1 2 0.50 adds w2, w3, w4, asr #0
+# CHECK-NEXT: 1 2 0.50 adds w5, w6, w7, asr #21
+# CHECK-NEXT: 1 2 0.50 adds w8, w9, w10, asr #31
+# CHECK-NEXT: 1 1 0.25 adds x3, x5, x7
+# CHECK-NEXT: 1 1 0.25 cmn x3, x5
+# CHECK-NEXT: 1 1 0.25 adds x20, xzr, x4
+# CHECK-NEXT: 1 1 0.25 adds x4, x6, xzr
+# CHECK-NEXT: 1 1 0.25 adds x11, x13, x15
+# CHECK-NEXT: 1 2 0.50 adds x9, x3, xzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 adds x17, x29, x20, lsl #63
+# CHECK-NEXT: 1 2 0.50 adds x21, x22, x23, lsr #0
+# CHECK-NEXT: 1 2 0.50 adds x24, x25, x26, lsr #18
+# CHECK-NEXT: 1 2 0.50 adds x27, x28, x29, lsr #63
+# CHECK-NEXT: 1 2 0.50 adds x2, x3, x4, asr #0
+# CHECK-NEXT: 1 2 0.50 adds x5, x6, x7, asr #21
+# CHECK-NEXT: 1 2 0.50 adds x8, x9, x10, asr #63
+# CHECK-NEXT: 1 1 0.13 sub w3, w5, w7
+# CHECK-NEXT: 1 1 0.13 sub wzr, w3, w5
+# CHECK-NEXT: 1 1 0.13 sub w4, w6, wzr
+# CHECK-NEXT: 1 1 0.13 sub w11, w13, w15
+# CHECK-NEXT: 1 2 0.50 sub w9, w3, wzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 sub w17, w29, w20, lsl #31
+# CHECK-NEXT: 1 2 0.50 sub w21, w22, w23, lsr #0
+# CHECK-NEXT: 1 2 0.50 sub w24, w25, w26, lsr #18
+# CHECK-NEXT: 1 2 0.50 sub w27, w28, w29, lsr #31
+# CHECK-NEXT: 1 2 0.50 sub w2, w3, w4, asr #0
+# CHECK-NEXT: 1 2 0.50 sub w5, w6, w7, asr #21
+# CHECK-NEXT: 1 2 0.50 sub w8, w9, w10, asr #31
+# CHECK-NEXT: 1 1 0.13 sub x3, x5, x7
+# CHECK-NEXT: 1 1 0.13 sub xzr, x3, x5
+# CHECK-NEXT: 1 1 0.13 sub x4, x6, xzr
+# CHECK-NEXT: 1 1 0.13 sub x11, x13, x15
+# CHECK-NEXT: 1 2 0.50 sub x9, x3, xzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 sub x17, x29, x20, lsl #63
+# CHECK-NEXT: 1 2 0.50 sub x21, x22, x23, lsr #0
+# CHECK-NEXT: 1 2 0.50 sub x24, x25, x26, lsr #18
+# CHECK-NEXT: 1 2 0.50 sub x27, x28, x29, lsr #63
+# CHECK-NEXT: 1 2 0.50 sub x2, x3, x4, asr #0
+# CHECK-NEXT: 1 2 0.50 sub x5, x6, x7, asr #21
+# CHECK-NEXT: 1 2 0.50 sub x8, x9, x10, asr #63
+# CHECK-NEXT: 1 1 0.25 subs w3, w5, w7
+# CHECK-NEXT: 1 1 0.25 cmp w3, w5
+# CHECK-NEXT: 1 1 0.25 subs w4, w6, wzr
+# CHECK-NEXT: 1 1 0.25 subs w11, w13, w15
+# CHECK-NEXT: 1 2 0.50 subs w9, w3, wzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 subs w17, w29, w20, lsl #31
+# CHECK-NEXT: 1 2 0.50 subs w21, w22, w23, lsr #0
+# CHECK-NEXT: 1 2 0.50 subs w24, w25, w26, lsr #18
+# CHECK-NEXT: 1 2 0.50 subs w27, w28, w29, lsr #31
+# CHECK-NEXT: 1 2 0.50 subs w2, w3, w4, asr #0
+# CHECK-NEXT: 1 2 0.50 subs w5, w6, w7, asr #21
+# CHECK-NEXT: 1 2 0.50 subs w8, w9, w10, asr #31
+# CHECK-NEXT: 1 1 0.25 subs x3, x5, x7
+# CHECK-NEXT: 1 1 0.25 cmp x3, x5
+# CHECK-NEXT: 1 1 0.25 subs x4, x6, xzr
+# CHECK-NEXT: 1 1 0.25 subs x11, x13, x15
+# CHECK-NEXT: 1 2 0.50 subs x9, x3, xzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 subs x17, x29, x20, lsl #63
+# CHECK-NEXT: 1 2 0.50 subs x21, x22, x23, lsr #0
+# CHECK-NEXT: 1 2 0.50 subs x24, x25, x26, lsr #18
+# CHECK-NEXT: 1 2 0.50 subs x27, x28, x29, lsr #63
+# CHECK-NEXT: 1 2 0.50 subs x2, x3, x4, asr #0
+# CHECK-NEXT: 1 2 0.50 subs x5, x6, x7, asr #21
+# CHECK-NEXT: 1 2 0.50 subs x8, x9, x10, asr #63
+# CHECK-NEXT: 1 1 0.25 cmn wzr, w4
+# CHECK-NEXT: 1 1 0.25 cmn w5, wzr
+# CHECK-NEXT: 1 1 0.25 cmn w6, w7
+# CHECK-NEXT: 1 2 0.50 cmn w8, w9, lsl #15
+# CHECK-NEXT: 1 2 0.50 cmn w10, w11, lsl #31
+# CHECK-NEXT: 1 2 0.50 cmn w12, w13, lsr #0
+# CHECK-NEXT: 1 2 0.50 cmn w14, w15, lsr #21
+# CHECK-NEXT: 1 2 0.50 cmn w16, w17, lsr #31
+# CHECK-NEXT: 1 2 0.50 cmn w18, w19, asr #0
+# CHECK-NEXT: 1 2 0.50 cmn w20, w21, asr #22
+# CHECK-NEXT: 1 2 0.50 cmn w22, w23, asr #31
+# CHECK-NEXT: 1 1 0.25 cmn x0, x3
+# CHECK-NEXT: 1 1 0.25 cmn xzr, x4
+# CHECK-NEXT: 1 1 0.25 cmn x5, xzr
+# CHECK-NEXT: 1 1 0.25 cmn x6, x7
+# CHECK-NEXT: 1 2 0.50 cmn x8, x9, lsl #15
+# CHECK-NEXT: 1 2 0.50 cmn x10, x11, lsl #63
+# CHECK-NEXT: 1 2 0.50 cmn x12, x13, lsr #0
+# CHECK-NEXT: 1 2 0.50 cmn x14, x15, lsr #41
+# CHECK-NEXT: 1 2 0.50 cmn x16, x17, lsr #63
+# CHECK-NEXT: 1 2 0.50 cmn x18, x19, asr #0
+# CHECK-NEXT: 1 2 0.50 cmn x20, x21, asr #55
+# CHECK-NEXT: 1 2 0.50 cmn x22, x23, asr #63
+# CHECK-NEXT: 1 1 0.25 cmp w0, w3
+# CHECK-NEXT: 1 1 0.25 cmp wzr, w4
+# CHECK-NEXT: 1 1 0.25 cmp w5, wzr
+# CHECK-NEXT: 1 1 0.25 cmp w6, w7
+# CHECK-NEXT: 1 2 0.50 cmp w8, w9, lsl #15
+# CHECK-NEXT: 1 2 0.50 cmp w10, w11, lsl #31
+# CHECK-NEXT: 1 2 0.50 cmp w12, w13, lsr #0
+# CHECK-NEXT: 1 2 0.50 cmp w14, w15, lsr #21
+# CHECK-NEXT: 1 2 0.50 cmp w18, w19, asr #0
+# CHECK-NEXT: 1 2 0.50 cmp w20, w21, asr #22
+# CHECK-NEXT: 1 2 0.50 cmp w22, w23, asr #31
+# CHECK-NEXT: 1 1 0.25 cmp x0, x3
+# CHECK-NEXT: 1 1 0.25 cmp xzr, x4
+# CHECK-NEXT: 1 1 0.25 cmp x5, xzr
+# CHECK-NEXT: 1 1 0.25 cmp x6, x7
+# CHECK-NEXT: 1 2 0.50 cmp x8, x9, lsl #15
+# CHECK-NEXT: 1 2 0.50 cmp x10, x11, lsl #63
+# CHECK-NEXT: 1 2 0.50 cmp x12, x13, lsr #0
+# CHECK-NEXT: 1 2 0.50 cmp x14, x15, lsr #41
+# CHECK-NEXT: 1 2 0.50 cmp x16, x17, lsr #63
+# CHECK-NEXT: 1 2 0.50 cmp x18, x19, asr #0
+# CHECK-NEXT: 1 2 0.50 cmp x20, x21, asr #55
+# CHECK-NEXT: 1 2 0.50 cmp x22, x23, asr #63
+# CHECK-NEXT: 1 1 0.25 cmp wzr, w0
+# CHECK-NEXT: 1 1 0.25 cmp xzr, x0
+# CHECK-NEXT: 1 1 0.13 adc w29, w27, w25
+# CHECK-NEXT: 1 1 0.13 adc wzr, w3, w4
+# CHECK-NEXT: 1 1 0.13 adc w9, wzr, w10
+# CHECK-NEXT: 1 1 0.13 adc w20, w0, wzr
+# CHECK-NEXT: 1 1 0.13 adc x29, x27, x25
+# CHECK-NEXT: 1 1 0.13 adc xzr, x3, x4
+# CHECK-NEXT: 1 1 0.13 adc x9, xzr, x10
+# CHECK-NEXT: 1 1 0.13 adc x20, x0, xzr
+# CHECK-NEXT: 1 1 0.25 adcs w29, w27, w25
+# CHECK-NEXT: 1 1 0.25 adcs wzr, w3, w4
+# CHECK-NEXT: 1 1 0.25 adcs w9, wzr, w10
+# CHECK-NEXT: 1 1 0.25 adcs w20, w0, wzr
+# CHECK-NEXT: 1 1 0.25 adcs x29, x27, x25
+# CHECK-NEXT: 1 1 0.25 adcs xzr, x3, x4
+# CHECK-NEXT: 1 1 0.25 adcs x9, xzr, x10
+# CHECK-NEXT: 1 1 0.25 adcs x20, x0, xzr
+# CHECK-NEXT: 1 1 0.13 sbc w29, w27, w25
+# CHECK-NEXT: 1 1 0.13 sbc wzr, w3, w4
+# CHECK-NEXT: 1 1 0.13 ngc w9, w10
+# CHECK-NEXT: 1 1 0.13 sbc w20, w0, wzr
+# CHECK-NEXT: 1 1 0.13 sbc x29, x27, x25
+# CHECK-NEXT: 1 1 0.13 sbc xzr, x3, x4
+# CHECK-NEXT: 1 1 0.13 ngc x9, x10
+# CHECK-NEXT: 1 1 0.13 sbc x20, x0, xzr
+# CHECK-NEXT: 1 1 0.25 sbcs w29, w27, w25
+# CHECK-NEXT: 1 1 0.25 sbcs wzr, w3, w4
+# CHECK-NEXT: 1 1 0.25 ngcs w9, w10
+# CHECK-NEXT: 1 1 0.25 sbcs w20, w0, wzr
+# CHECK-NEXT: 1 1 0.25 sbcs x29, x27, x25
+# CHECK-NEXT: 1 1 0.25 sbcs xzr, x3, x4
+# CHECK-NEXT: 1 1 0.25 ngcs x9, x10
+# CHECK-NEXT: 1 1 0.25 sbcs x20, x0, xzr
+# CHECK-NEXT: 1 1 0.13 ngc w3, w12
+# CHECK-NEXT: 1 1 0.13 ngc wzr, w9
+# CHECK-NEXT: 1 1 0.13 ngc w23, wzr
+# CHECK-NEXT: 1 1 0.13 ngc x29, x30
+# CHECK-NEXT: 1 1 0.13 ngc xzr, x0
+# CHECK-NEXT: 1 1 0.13 ngc x0, xzr
+# CHECK-NEXT: 1 1 0.25 ngcs w3, w12
+# CHECK-NEXT: 1 1 0.25 ngcs wzr, w9
+# CHECK-NEXT: 1 1 0.25 ngcs w23, wzr
+# CHECK-NEXT: 1 1 0.25 ngcs x29, x30
+# CHECK-NEXT: 1 1 0.25 ngcs xzr, x0
+# CHECK-NEXT: 1 1 0.25 ngcs x0, xzr
+# CHECK-NEXT: 1 1 0.13 sbfx x1, x2, #3, #2
+# CHECK-NEXT: 1 1 0.13 asr x3, x4, #63
+# CHECK-NEXT: 1 1 0.13 asr wzr, wzr, #31
+# CHECK-NEXT: 1 1 0.13 sbfx w12, w9, #0, #1
+# CHECK-NEXT: 1 1 0.13 ubfiz x4, x5, #52, #11
+# CHECK-NEXT: 1 1 0.13 ubfx xzr, x4, #0, #1
+# CHECK-NEXT: 1 1 0.13 ubfiz x4, xzr, #1, #6
+# CHECK-NEXT: 1 1 0.13 lsr x5, x6, #12
+# CHECK-NEXT: 1 2 0.50 bfi x4, x5, #52, #11
+# CHECK-NEXT: 1 2 0.50 bfxil xzr, x4, #0, #1
+# CHECK-NEXT: 1 2 0.50 bfc x4, #1, #6
+# CHECK-NEXT: 1 2 0.50 bfxil x5, x6, #12, #52
+# CHECK-NEXT: 1 1 0.13 sxtb w1, w2
+# CHECK-NEXT: 1 1 0.13 sxtb xzr, w3
+# CHECK-NEXT: 1 1 0.13 sxth w9, w10
+# CHECK-NEXT: 1 1 0.13 sxth x0, w1
+# CHECK-NEXT: 1 1 0.13 sxtw x3, w30
+# CHECK-NEXT: 1 1 0.13 uxtb w1, w2
+# CHECK-NEXT: 1 1 0.13 uxth w9, w10
+# CHECK-NEXT: 1 1 0.13 ubfx x3, x30, #0, #32
+# CHECK-NEXT: 1 1 0.13 asr w3, w2, #0
+# CHECK-NEXT: 1 1 0.13 asr w9, w10, #31
+# CHECK-NEXT: 1 1 0.13 asr x20, x21, #63
+# CHECK-NEXT: 1 1 0.13 asr w1, wzr, #3
+# CHECK-NEXT: 1 1 0.13 lsr w3, w2, #0
+# CHECK-NEXT: 1 1 0.13 lsr w9, w10, #31
+# CHECK-NEXT: 1 1 0.13 lsr x20, x21, #63
+# CHECK-NEXT: 1 1 0.13 lsr wzr, wzr, #3
+# CHECK-NEXT: 1 1 0.13 lsr w3, w2, #0
+# CHECK-NEXT: 1 1 0.13 lsl w9, w10, #31
+# CHECK-NEXT: 1 1 0.13 lsl x20, x21, #63
+# CHECK-NEXT: 1 1 0.13 lsl w1, wzr, #3
+# CHECK-NEXT: 1 1 0.13 sbfx w9, w10, #0, #1
+# CHECK-NEXT: 1 1 0.13 sbfiz x2, x3, #63, #1
+# CHECK-NEXT: 1 1 0.13 asr x19, x20, #0
+# CHECK-NEXT: 1 1 0.13 sbfiz x9, x10, #5, #59
+# CHECK-NEXT: 1 1 0.13 asr w9, w10, #0
+# CHECK-NEXT: 1 1 0.13 sbfiz w11, w12, #31, #1
+# CHECK-NEXT: 1 1 0.13 sbfiz w13, w14, #29, #3
+# CHECK-NEXT: 1 1 0.13 sbfiz xzr, xzr, #10, #11
+# CHECK-NEXT: 1 1 0.13 sbfx w9, w10, #0, #1
+# CHECK-NEXT: 1 1 0.13 asr x2, x3, #63
+# CHECK-NEXT: 1 1 0.13 asr x19, x20, #0
+# CHECK-NEXT: 1 1 0.13 asr x9, x10, #5
+# CHECK-NEXT: 1 1 0.13 asr w9, w10, #0
+# CHECK-NEXT: 1 1 0.13 asr w11, w12, #31
+# CHECK-NEXT: 1 1 0.13 asr w13, w14, #29
+# CHECK-NEXT: 1 1 0.13 sbfx xzr, xzr, #10, #11
+# CHECK-NEXT: 1 2 0.50 bfxil w9, w10, #0, #1
+# CHECK-NEXT: 1 2 0.50 bfi x2, x3, #63, #1
+# CHECK-NEXT: 1 2 0.50 bfxil x19, x20, #0, #64
+# CHECK-NEXT: 1 2 0.50 bfi x9, x10, #5, #59
+# CHECK-NEXT: 1 2 0.50 bfxil w9, w10, #0, #32
+# CHECK-NEXT: 1 2 0.50 bfi w11, w12, #31, #1
+# CHECK-NEXT: 1 2 0.50 bfi w13, w14, #29, #3
+# CHECK-NEXT: 1 2 0.50 bfc xzr, #10, #11
+# CHECK-NEXT: 1 2 0.50 bfxil w9, w10, #0, #1
+# CHECK-NEXT: 1 2 0.50 bfxil x2, x3, #63, #1
+# CHECK-NEXT: 1 2 0.50 bfxil x19, x20, #0, #64
+# CHECK-NEXT: 1 2 0.50 bfxil x9, x10, #5, #59
+# CHECK-NEXT: 1 2 0.50 bfxil w9, w10, #0, #32
+# CHECK-NEXT: 1 2 0.50 bfxil w11, w12, #31, #1
+# CHECK-NEXT: 1 2 0.50 bfxil w13, w14, #29, #3
+# CHECK-NEXT: 1 2 0.50 bfxil xzr, xzr, #10, #11
+# CHECK-NEXT: 1 1 0.13 ubfx w9, w10, #0, #1
+# CHECK-NEXT: 1 1 0.13 lsl x2, x3, #63
+# CHECK-NEXT: 1 1 0.13 lsr x19, x20, #0
+# CHECK-NEXT: 1 1 0.13 lsl x9, x10, #5
+# CHECK-NEXT: 1 1 0.13 lsr w9, w10, #0
+# CHECK-NEXT: 1 1 0.13 lsl w11, w12, #31
+# CHECK-NEXT: 1 1 0.13 lsl w13, w14, #29
+# CHECK-NEXT: 1 1 0.13 ubfiz xzr, xzr, #10, #11
+# CHECK-NEXT: 1 1 0.13 ubfx w9, w10, #0, #1
+# CHECK-NEXT: 1 1 0.13 lsr x2, x3, #63
+# CHECK-NEXT: 1 1 0.13 lsr x19, x20, #0
+# CHECK-NEXT: 1 1 0.13 lsr x9, x10, #5
+# CHECK-NEXT: 1 1 0.13 lsr w9, w10, #0
+# CHECK-NEXT: 1 1 0.13 lsr w11, w12, #31
+# CHECK-NEXT: 1 1 0.13 lsr w13, w14, #29
+# CHECK-NEXT: 1 1 0.13 ubfx xzr, xzr, #10, #11
+# CHECK-NEXT: 1 1 0.33 cbz w5, #4
+# CHECK-NEXT: 1 1 0.33 cbz x5, #0
+# CHECK-NEXT: 1 1 0.33 cbnz x2, #-4
+# CHECK-NEXT: 1 1 0.33 cbnz x26, #1048572
+# CHECK-NEXT: 1 1 0.33 cbz wzr, #0
+# CHECK-NEXT: 1 1 0.33 cbnz xzr, #0
+# CHECK-NEXT: 1 1 0.33 b.ne #4
+# CHECK-NEXT: 1 1 0.33 b.ge #1048572
+# CHECK-NEXT: 1 1 0.33 b.ge #-4
+# CHECK-NEXT: 1 1 0.25 ccmp w1, #31, #0, eq
+# CHECK-NEXT: 1 1 0.25 ccmp w3, #0, #15, hs
+# CHECK-NEXT: 1 1 0.25 ccmp wzr, #15, #13, hs
+# CHECK-NEXT: 1 1 0.25 ccmp x9, #31, #0, le
+# CHECK-NEXT: 1 1 0.25 ccmp x3, #0, #15, gt
+# CHECK-NEXT: 1 1 0.25 ccmp xzr, #5, #7, ne
+# CHECK-NEXT: 1 1 0.25 ccmn w1, #31, #0, eq
+# CHECK-NEXT: 1 1 0.25 ccmn w3, #0, #15, hs
+# CHECK-NEXT: 1 1 0.25 ccmn wzr, #15, #13, hs
+# CHECK-NEXT: 1 1 0.25 ccmn x9, #31, #0, le
+# CHECK-NEXT: 1 1 0.25 ccmn x3, #0, #15, gt
+# CHECK-NEXT: 1 1 0.25 ccmn xzr, #5, #7, ne
+# CHECK-NEXT: 1 1 0.25 ccmp w1, wzr, #0, eq
+# CHECK-NEXT: 1 1 0.25 ccmp w3, w0, #15, hs
+# CHECK-NEXT: 1 1 0.25 ccmp wzr, w15, #13, hs
+# CHECK-NEXT: 1 1 0.25 ccmp x9, xzr, #0, le
+# CHECK-NEXT: 1 1 0.25 ccmp x3, x0, #15, gt
+# CHECK-NEXT: 1 1 0.25 ccmp xzr, x5, #7, ne
+# CHECK-NEXT: 1 1 0.25 ccmn w1, wzr, #0, eq
+# CHECK-NEXT: 1 1 0.25 ccmn w3, w0, #15, hs
+# CHECK-NEXT: 1 1 0.25 ccmn wzr, w15, #13, hs
+# CHECK-NEXT: 1 1 0.25 ccmn x9, xzr, #0, le
+# CHECK-NEXT: 1 1 0.25 ccmn x3, x0, #15, gt
+# CHECK-NEXT: 1 1 0.25 ccmn xzr, x5, #7, ne
+# CHECK-NEXT: 1 1 0.13 csel w1, w0, w19, ne
+# CHECK-NEXT: 1 1 0.13 csel wzr, w5, w9, eq
+# CHECK-NEXT: 1 1 0.13 csel w9, wzr, w30, gt
+# CHECK-NEXT: 1 1 0.13 csel w1, w28, wzr, mi
+# CHECK-NEXT: 1 1 0.13 csel x19, x23, x29, lt
+# CHECK-NEXT: 1 1 0.13 csel xzr, x3, x4, ge
+# CHECK-NEXT: 1 1 0.13 csel x5, xzr, x6, hs
+# CHECK-NEXT: 1 1 0.13 csel x7, x8, xzr, lo
+# CHECK-NEXT: 1 1 0.13 csinc w1, w0, w19, ne
+# CHECK-NEXT: 1 1 0.13 csinc wzr, w5, w9, eq
+# CHECK-NEXT: 1 1 0.13 csinc w9, wzr, w30, gt
+# CHECK-NEXT: 1 1 0.13 csinc w1, w28, wzr, mi
+# CHECK-NEXT: 1 1 0.13 csinc x19, x23, x29, lt
+# CHECK-NEXT: 1 1 0.13 csinc xzr, x3, x4, ge
+# CHECK-NEXT: 1 1 0.13 csinc x5, xzr, x6, hs
+# CHECK-NEXT: 1 1 0.13 csinc x7, x8, xzr, lo
+# CHECK-NEXT: 1 1 0.13 csinv w1, w0, w19, ne
+# CHECK-NEXT: 1 1 0.13 csinv wzr, w5, w9, eq
+# CHECK-NEXT: 1 1 0.13 csinv w9, wzr, w30, gt
+# CHECK-NEXT: 1 1 0.13 csinv w1, w28, wzr, mi
+# CHECK-NEXT: 1 1 0.13 csinv x19, x23, x29, lt
+# CHECK-NEXT: 1 1 0.13 csinv xzr, x3, x4, ge
+# CHECK-NEXT: 1 1 0.13 csinv x5, xzr, x6, hs
+# CHECK-NEXT: 1 1 0.13 csinv x7, x8, xzr, lo
+# CHECK-NEXT: 1 1 0.13 csneg w1, w0, w19, ne
+# CHECK-NEXT: 1 1 0.13 csneg wzr, w5, w9, eq
+# CHECK-NEXT: 1 1 0.13 csneg w9, wzr, w30, gt
+# CHECK-NEXT: 1 1 0.13 csneg w1, w28, wzr, mi
+# CHECK-NEXT: 1 1 0.13 csneg x19, x23, x29, lt
+# CHECK-NEXT: 1 1 0.13 csneg xzr, x3, x4, ge
+# CHECK-NEXT: 1 1 0.13 csneg x5, xzr, x6, hs
+# CHECK-NEXT: 1 1 0.13 csneg x7, x8, xzr, lo
+# CHECK-NEXT: 1 1 0.13 cset w3, eq
+# CHECK-NEXT: 1 1 0.13 cset x9, pl
+# CHECK-NEXT: 1 1 0.13 csetm w20, ne
+# CHECK-NEXT: 1 1 0.13 csetm x30, ge
+# CHECK-NEXT: 1 1 0.13 csinc w2, wzr, wzr, al
+# CHECK-NEXT: 1 1 0.13 csinv x3, xzr, xzr, nv
+# CHECK-NEXT: 1 1 0.13 cinc w3, w5, gt
+# CHECK-NEXT: 1 1 0.13 cinc wzr, w4, le
+# CHECK-NEXT: 1 1 0.13 cset w9, lt
+# CHECK-NEXT: 1 1 0.13 cinc x3, x5, gt
+# CHECK-NEXT: 1 1 0.13 cinc xzr, x4, le
+# CHECK-NEXT: 1 1 0.13 cset x9, lt
+# CHECK-NEXT: 1 1 0.13 csinc w5, w6, w6, nv
+# CHECK-NEXT: 1 1 0.13 csinc x1, x2, x2, al
+# CHECK-NEXT: 1 1 0.13 cinv w3, w5, gt
+# CHECK-NEXT: 1 1 0.13 cinv wzr, w4, le
+# CHECK-NEXT: 1 1 0.13 csetm w9, lt
+# CHECK-NEXT: 1 1 0.13 cinv x3, x5, gt
+# CHECK-NEXT: 1 1 0.13 cinv xzr, x4, le
+# CHECK-NEXT: 1 1 0.13 csetm x9, lt
+# CHECK-NEXT: 1 1 0.13 csinv x1, x0, x0, al
+# CHECK-NEXT: 1 1 0.13 csinv w9, w8, w8, nv
+# CHECK-NEXT: 1 1 0.13 cneg w3, w5, gt
+# CHECK-NEXT: 1 1 0.13 cneg wzr, w4, le
+# CHECK-NEXT: 1 1 0.13 cneg w9, wzr, lt
+# CHECK-NEXT: 1 1 0.13 cneg x3, x5, gt
+# CHECK-NEXT: 1 1 0.13 cneg xzr, x4, le
+# CHECK-NEXT: 1 1 0.13 cneg x9, xzr, lt
+# CHECK-NEXT: 1 1 0.13 csneg x4, x8, x8, al
+# CHECK-NEXT: 1 1 0.13 csinv w9, w8, w8, nv
+# CHECK-NEXT: 1 1 0.13 rbit w0, w7
+# CHECK-NEXT: 1 1 0.13 rbit x18, x3
+# CHECK-NEXT: 1 1 0.13 rev16 w17, w1
+# CHECK-NEXT: 1 1 0.13 rev16 x5, x2
+# CHECK-NEXT: 1 1 0.13 rev w18, w0
+# CHECK-NEXT: 1 1 0.13 rev32 x20, x1
+# CHECK-NEXT: 1 1 0.13 rev x22, x2
+# CHECK-NEXT: 1 1 0.13 clz w24, w3
+# CHECK-NEXT: 1 1 0.13 clz x26, x4
+# CHECK-NEXT: 1 1 0.13 cls w3, w5
+# CHECK-NEXT: 1 1 0.13 cls x20, x5
+# CHECK-NEXT: 1 12 12.00 udiv w0, w7, w10
+# CHECK-NEXT: 1 20 20.00 udiv x9, x22, x4
+# CHECK-NEXT: 1 12 12.00 sdiv w12, w21, w0
+# CHECK-NEXT: 1 20 20.00 sdiv x13, x2, x1
+# CHECK-NEXT: 1 1 0.13 lsl w11, w12, w13
+# CHECK-NEXT: 1 1 0.13 lsl x14, x15, x16
+# CHECK-NEXT: 1 1 0.13 lsr w17, w18, w19
+# CHECK-NEXT: 1 1 0.13 lsr x20, x21, x22
+# CHECK-NEXT: 1 1 0.13 asr w23, w24, w25
+# CHECK-NEXT: 1 1 0.13 asr x26, x27, x28
+# CHECK-NEXT: 1 1 0.13 ror w0, w1, w2
+# CHECK-NEXT: 1 1 0.13 ror x3, x4, x5
+# CHECK-NEXT: 1 1 0.13 lsl w6, w7, w8
+# CHECK-NEXT: 1 1 0.13 lsl x9, x10, x11
+# CHECK-NEXT: 1 1 0.13 lsr w12, w13, w14
+# CHECK-NEXT: 1 1 0.13 lsr x15, x16, x17
+# CHECK-NEXT: 1 1 0.13 asr w18, w19, w20
+# CHECK-NEXT: 1 1 0.13 asr x21, x22, x23
+# CHECK-NEXT: 1 1 0.13 ror w24, w25, w26
+# CHECK-NEXT: 1 1 0.13 ror x27, x28, x29
+# CHECK-NEXT: 1 3 0.50 smulh x30, x29, x28
+# CHECK-NEXT: 1 3 0.50 smulh xzr, x27, x26
+# CHECK-NEXT: 1 3 0.50 umulh x30, x29, x28
+# CHECK-NEXT: 1 3 0.50 umulh x23, x30, xzr
+# CHECK-NEXT: 1 2 0.50 madd w1, w3, w7, w4
+# CHECK-NEXT: 1 2 0.50 madd wzr, w0, w9, w11
+# CHECK-NEXT: 1 2 0.50 madd w13, wzr, w4, w4
+# CHECK-NEXT: 1 2 0.50 madd w19, w30, wzr, w29
+# CHECK-NEXT: 1 2 0.50 mul w4, w5, w6
+# CHECK-NEXT: 1 2 0.50 madd x1, x3, x7, x4
+# CHECK-NEXT: 1 2 0.50 madd xzr, x0, x9, x11
+# CHECK-NEXT: 1 2 0.50 madd x13, xzr, x4, x4
+# CHECK-NEXT: 1 2 0.50 madd x19, x30, xzr, x29
+# CHECK-NEXT: 1 2 0.50 mul x4, x5, x6
+# CHECK-NEXT: 1 2 0.50 msub w1, w3, w7, w4
+# CHECK-NEXT: 1 2 0.50 msub wzr, w0, w9, w11
+# CHECK-NEXT: 1 2 0.50 msub w13, wzr, w4, w4
+# CHECK-NEXT: 1 2 0.50 msub w19, w30, wzr, w29
+# CHECK-NEXT: 1 2 0.50 mneg w4, w5, w6
+# CHECK-NEXT: 1 2 0.50 msub x1, x3, x7, x4
+# CHECK-NEXT: 1 2 0.50 msub xzr, x0, x9, x11
+# CHECK-NEXT: 1 2 0.50 msub x13, xzr, x4, x4
+# CHECK-NEXT: 1 2 0.50 msub x19, x30, xzr, x29
+# CHECK-NEXT: 1 2 0.50 mneg x4, x5, x6
+# CHECK-NEXT: 1 2 0.50 smaddl x3, w5, w2, x9
+# CHECK-NEXT: 1 2 0.50 smaddl xzr, w10, w11, x12
+# CHECK-NEXT: 1 2 0.50 smaddl x13, wzr, w14, x15
+# CHECK-NEXT: 1 2 0.50 smaddl x16, w17, wzr, x18
+# CHECK-NEXT: 1 2 0.50 smull x19, w20, w21
+# CHECK-NEXT: 1 2 0.50 smsubl x3, w5, w2, x9
+# CHECK-NEXT: 1 2 0.50 smsubl xzr, w10, w11, x12
+# CHECK-NEXT: 1 2 0.50 smsubl x13, wzr, w14, x15
+# CHECK-NEXT: 1 2 0.50 smsubl x16, w17, wzr, x18
+# CHECK-NEXT: 1 2 0.50 smnegl x19, w20, w21
+# CHECK-NEXT: 1 2 0.50 umaddl x3, w5, w2, x9
+# CHECK-NEXT: 1 2 0.50 umaddl xzr, w10, w11, x12
+# CHECK-NEXT: 1 2 0.50 umaddl x13, wzr, w14, x15
+# CHECK-NEXT: 1 2 0.50 umaddl x16, w17, wzr, x18
+# CHECK-NEXT: 1 2 0.50 umull x19, w20, w21
+# CHECK-NEXT: 1 2 0.50 umsubl x3, w5, w2, x9
+# CHECK-NEXT: 1 2 0.50 umsubl x16, w17, wzr, x18
+# CHECK-NEXT: 1 2 0.50 umnegl x19, w20, w21
+# CHECK-NEXT: 1 3 0.50 smulh x30, x29, x28
+# CHECK-NEXT: 1 3 0.50 smulh x23, x22, xzr
+# CHECK-NEXT: 1 3 0.50 umulh x23, x22, xzr
+# CHECK-NEXT: 1 2 0.50 mul x19, x20, xzr
+# CHECK-NEXT: 1 2 0.50 mneg w21, w22, w23
+# CHECK-NEXT: 1 2 0.50 smull x11, w13, w17
+# CHECK-NEXT: 1 2 0.50 umull x11, w13, w17
+# CHECK-NEXT: 1 2 0.50 smnegl x11, w13, w17
+# CHECK-NEXT: 1 2 0.50 umnegl x11, w13, w17
+# CHECK-NEXT: 2 3 0.50 extr w3, w5, w7, #0
+# CHECK-NEXT: 2 3 0.50 extr w11, w13, w17, #31
+# CHECK-NEXT: 2 3 0.50 extr x3, x5, x7, #15
+# CHECK-NEXT: 2 3 0.50 extr x11, x13, x17, #63
+# CHECK-NEXT: 1 1 0.13 ror x19, x23, #24
+# CHECK-NEXT: 1 1 0.13 ror x29, xzr, #63
+# CHECK-NEXT: 1 1 0.13 ror w9, w13, #31
+# CHECK-NEXT: 1 2 1.00 fcmp s3, s5
+# CHECK-NEXT: 1 2 1.00 fcmp s31, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmp s31, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmpe s29, s30
+# CHECK-NEXT: 1 2 1.00 fcmpe s15, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmpe s15, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmp d4, d12
+# CHECK-NEXT: 1 2 1.00 fcmp d23, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmp d23, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmpe d26, d22
+# CHECK-NEXT: 1 2 1.00 fcmpe d29, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmpe d29, #0.0
+# CHECK-NEXT: 1 2 1.00 fccmp s1, s31, #0, eq
+# CHECK-NEXT: 1 2 1.00 fccmp s3, s0, #15, hs
+# CHECK-NEXT: 1 2 1.00 fccmp s31, s15, #13, hs
+# CHECK-NEXT: 1 2 1.00 fccmp d9, d31, #0, le
+# CHECK-NEXT: 1 2 1.00 fccmp d3, d0, #15, gt
+# CHECK-NEXT: 1 2 1.00 fccmp d31, d5, #7, ne
+# CHECK-NEXT: 1 2 1.00 fccmpe s1, s31, #0, eq
+# CHECK-NEXT: 1 2 1.00 fccmpe s3, s0, #15, hs
+# CHECK-NEXT: 1 2 1.00 fccmpe s31, s15, #13, hs
+# CHECK-NEXT: 1 2 1.00 fccmpe d9, d31, #0, le
+# CHECK-NEXT: 1 2 1.00 fccmpe d3, d0, #15, gt
+# CHECK-NEXT: 1 2 1.00 fccmpe d31, d5, #7, ne
+# CHECK-NEXT: 1 2 0.25 fcsel s3, s20, s9, pl
+# CHECK-NEXT: 1 2 0.25 fcsel d9, d10, d11, mi
+# CHECK-NEXT: 1 2 0.25 fmov s0, s1
+# CHECK-NEXT: 1 2 0.25 fabs s2, s3
+# CHECK-NEXT: 1 2 0.25 fneg s4, s5
+# CHECK-NEXT: 1 8 1.00 fsqrt s6, s7
+# CHECK-NEXT: 1 3 0.50 fcvt d8, s9
+# CHECK-NEXT: 1 3 0.50 fcvt h10, s11
+# CHECK-NEXT: 1 3 0.50 frintn s12, s13
+# CHECK-NEXT: 1 3 0.50 frintp s14, s15
+# CHECK-NEXT: 1 3 0.50 frintm s16, s17
+# CHECK-NEXT: 1 3 0.50 frintz s18, s19
+# CHECK-NEXT: 1 3 0.50 frinta s20, s21
+# CHECK-NEXT: 1 3 0.50 frintx s22, s23
+# CHECK-NEXT: 1 3 0.50 frinti s24, s25
+# CHECK-NEXT: 1 2 0.25 fmov d0, d1
+# CHECK-NEXT: 1 2 0.25 fabs d2, d3
+# CHECK-NEXT: 1 2 0.25 fneg d4, d5
+# CHECK-NEXT: 1 13 1.00 fsqrt d6, d7
+# CHECK-NEXT: 1 3 0.50 fcvt s8, d9
+# CHECK-NEXT: 1 3 0.50 fcvt h10, d11
+# CHECK-NEXT: 1 3 0.50 frintn d12, d13
+# CHECK-NEXT: 1 3 0.50 frintp d14, d15
+# CHECK-NEXT: 1 3 0.50 frintm d16, d17
+# CHECK-NEXT: 1 3 0.50 frintz d18, d19
+# CHECK-NEXT: 1 3 0.50 frinta d20, d21
+# CHECK-NEXT: 1 3 0.50 frintx d22, d23
+# CHECK-NEXT: 1 3 0.50 frinti d24, d25
+# CHECK-NEXT: 1 3 0.50 fcvt s26, h27
+# CHECK-NEXT: 1 3 0.50 fcvt d28, h29
+# CHECK-NEXT: 1 3 0.25 fmul s20, s19, s17
+# CHECK-NEXT: 1 8 1.00 fdiv s1, s2, s3
+# CHECK-NEXT: 1 2 0.25 fadd s4, s5, s6
+# CHECK-NEXT: 1 2 0.25 fsub s7, s8, s9
+# CHECK-NEXT: 1 2 0.25 fmax s10, s11, s12
+# CHECK-NEXT: 1 2 0.25 fmin s13, s14, s15
+# CHECK-NEXT: 1 2 0.25 fmaxnm s16, s17, s18
+# CHECK-NEXT: 1 2 0.25 fminnm s19, s20, s21
+# CHECK-NEXT: 1 3 0.25 fnmul s22, s23, s2
+# CHECK-NEXT: 1 3 0.25 fmul d20, d19, d17
+# CHECK-NEXT: 1 13 1.00 fdiv d1, d2, d3
+# CHECK-NEXT: 1 2 0.25 fadd d4, d5, d6
+# CHECK-NEXT: 1 2 0.25 fsub d7, d8, d9
+# CHECK-NEXT: 1 2 0.25 fmax d10, d11, d12
+# CHECK-NEXT: 1 2 0.25 fmin d13, d14, d15
+# CHECK-NEXT: 1 2 0.25 fmaxnm d16, d17, d18
+# CHECK-NEXT: 1 2 0.25 fminnm d19, d20, d21
+# CHECK-NEXT: 1 3 0.25 fnmul d22, d23, d24
+# CHECK-NEXT: 1 4 0.25 fmadd s3, s5, s6, s31
+# CHECK-NEXT: 1 4 0.25 fmadd d3, d13, d0, d23
+# CHECK-NEXT: 1 4 0.25 fmsub s3, s5, s6, s31
+# CHECK-NEXT: 1 4 0.25 fmsub d3, d13, d0, d23
+# CHECK-NEXT: 1 4 0.25 fnmadd s3, s5, s6, s31
+# CHECK-NEXT: 1 4 0.25 fnmadd d3, d13, d0, d23
+# CHECK-NEXT: 1 4 0.25 fnmsub s3, s5, s6, s31
+# CHECK-NEXT: 1 4 0.25 fnmsub d3, d13, d0, d23
+# CHECK-NEXT: 1 3 1.00 fcvtzs w3, h5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs wzr, h20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzs w19, h0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzs x3, h5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, h30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzs x19, h0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzs w3, s5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs wzr, s20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzs w19, s0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzs x3, s5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, s30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzs x19, s0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzs w3, d5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs wzr, d20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzs w19, d0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzs x3, d5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, d30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzs x19, d0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzu w3, h5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu wzr, h20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w19, h0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzu x3, h5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu x12, h30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzu x19, h0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzu w3, s5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu wzr, s20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w19, s0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzu x3, s5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu x12, s30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzu x19, s0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzu w3, d5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu wzr, d20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w19, d0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzu x3, d5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu x12, d30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzu x19, d0, #64
+# CHECK-NEXT: 1 3 1.00 scvtf h23, w19, #1
+# CHECK-NEXT: 1 3 1.00 scvtf h31, wzr, #20
+# CHECK-NEXT: 1 3 1.00 scvtf h14, w0, #32
+# CHECK-NEXT: 1 3 1.00 scvtf h23, x19, #1
+# CHECK-NEXT: 1 3 1.00 scvtf h31, xzr, #20
+# CHECK-NEXT: 1 3 1.00 scvtf h14, x0, #64
+# CHECK-NEXT: 1 3 1.00 scvtf s23, w19, #1
+# CHECK-NEXT: 1 3 1.00 scvtf s31, wzr, #20
+# CHECK-NEXT: 1 3 1.00 scvtf s14, w0, #32
+# CHECK-NEXT: 1 3 1.00 scvtf s23, x19, #1
+# CHECK-NEXT: 1 3 1.00 scvtf s31, xzr, #20
+# CHECK-NEXT: 1 3 1.00 scvtf s14, x0, #64
+# CHECK-NEXT: 1 3 1.00 scvtf d23, w19, #1
+# CHECK-NEXT: 1 3 1.00 scvtf d31, wzr, #20
+# CHECK-NEXT: 1 3 1.00 scvtf d14, w0, #32
+# CHECK-NEXT: 1 3 1.00 scvtf d23, x19, #1
+# CHECK-NEXT: 1 3 1.00 scvtf d31, xzr, #20
+# CHECK-NEXT: 1 3 1.00 scvtf d14, x0, #64
+# CHECK-NEXT: 1 3 1.00 ucvtf h23, w19, #1
+# CHECK-NEXT: 1 3 1.00 ucvtf h31, wzr, #20
+# CHECK-NEXT: 1 3 1.00 ucvtf h14, w0, #32
+# CHECK-NEXT: 1 3 1.00 ucvtf h23, x19, #1
+# CHECK-NEXT: 1 3 1.00 ucvtf h31, xzr, #20
+# CHECK-NEXT: 1 3 1.00 ucvtf h14, x0, #64
+# CHECK-NEXT: 1 3 1.00 ucvtf s23, w19, #1
+# CHECK-NEXT: 1 3 1.00 ucvtf s31, wzr, #20
+# CHECK-NEXT: 1 3 1.00 ucvtf s14, w0, #32
+# CHECK-NEXT: 1 3 1.00 ucvtf s23, x19, #1
+# CHECK-NEXT: 1 3 1.00 ucvtf s31, xzr, #20
+# CHECK-NEXT: 1 3 1.00 ucvtf s14, x0, #64
+# CHECK-NEXT: 1 3 1.00 ucvtf d23, w19, #1
+# CHECK-NEXT: 1 3 1.00 ucvtf d31, wzr, #20
+# CHECK-NEXT: 1 3 1.00 ucvtf d14, w0, #32
+# CHECK-NEXT: 1 3 1.00 ucvtf d23, x19, #1
+# CHECK-NEXT: 1 3 1.00 ucvtf d31, xzr, #20
+# CHECK-NEXT: 1 3 1.00 ucvtf d14, x0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtns w3, h31
+# CHECK-NEXT: 1 3 1.00 fcvtns xzr, h12
+# CHECK-NEXT: 1 3 1.00 fcvtnu wzr, h12
+# CHECK-NEXT: 1 3 1.00 fcvtnu x0, h0
+# CHECK-NEXT: 1 3 1.00 fcvtps wzr, h9
+# CHECK-NEXT: 1 3 1.00 fcvtps x12, h20
+# CHECK-NEXT: 1 3 1.00 fcvtpu w30, h23
+# CHECK-NEXT: 1 3 1.00 fcvtpu x29, h3
+# CHECK-NEXT: 1 3 1.00 fcvtms w2, h3
+# CHECK-NEXT: 1 3 1.00 fcvtms x4, h5
+# CHECK-NEXT: 1 3 1.00 fcvtmu w6, h7
+# CHECK-NEXT: 1 3 1.00 fcvtmu x8, h9
+# CHECK-NEXT: 1 3 1.00 fcvtzs w10, h11
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, h13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w14, h15
+# CHECK-NEXT: 1 3 1.00 fcvtzu x15, h16
+# CHECK-NEXT: 1 3 1.00 scvtf h17, w18
+# CHECK-NEXT: 1 3 1.00 scvtf h19, x20
+# CHECK-NEXT: 1 3 1.00 ucvtf h21, w22
+# CHECK-NEXT: 1 3 1.00 scvtf h23, x24
+# CHECK-NEXT: 1 3 1.00 fcvtas w25, h26
+# CHECK-NEXT: 1 3 1.00 fcvtas x27, h28
+# CHECK-NEXT: 1 3 1.00 fcvtau w29, h30
+# CHECK-NEXT: 1 3 1.00 fcvtau xzr, h0
+# CHECK-NEXT: 1 3 1.00 fcvtns w3, s31
+# CHECK-NEXT: 1 3 1.00 fcvtns xzr, s12
+# CHECK-NEXT: 1 3 1.00 fcvtnu wzr, s12
+# CHECK-NEXT: 1 3 1.00 fcvtnu x0, s0
+# CHECK-NEXT: 1 3 1.00 fcvtps wzr, s9
+# CHECK-NEXT: 1 3 1.00 fcvtps x12, s20
+# CHECK-NEXT: 1 3 1.00 fcvtpu w30, s23
+# CHECK-NEXT: 1 3 1.00 fcvtpu x29, s3
+# CHECK-NEXT: 1 3 1.00 fcvtms w2, s3
+# CHECK-NEXT: 1 3 1.00 fcvtms x4, s5
+# CHECK-NEXT: 1 3 1.00 fcvtmu w6, s7
+# CHECK-NEXT: 1 3 1.00 fcvtmu x8, s9
+# CHECK-NEXT: 1 3 1.00 fcvtzs w10, s11
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, s13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w14, s15
+# CHECK-NEXT: 1 3 1.00 fcvtzu x15, s16
+# CHECK-NEXT: 1 3 1.00 scvtf s17, w18
+# CHECK-NEXT: 1 3 1.00 scvtf s19, x20
+# CHECK-NEXT: 1 3 1.00 ucvtf s21, w22
+# CHECK-NEXT: 1 3 1.00 scvtf s23, x24
+# CHECK-NEXT: 1 3 1.00 fcvtas w25, s26
+# CHECK-NEXT: 1 3 1.00 fcvtas x27, s28
+# CHECK-NEXT: 1 3 1.00 fcvtau w29, s30
+# CHECK-NEXT: 1 3 1.00 fcvtau xzr, s0
+# CHECK-NEXT: 1 3 1.00 fcvtns w3, d31
+# CHECK-NEXT: 1 3 1.00 fcvtns xzr, d12
+# CHECK-NEXT: 1 3 1.00 fcvtnu wzr, d12
+# CHECK-NEXT: 1 3 1.00 fcvtnu x0, d0
+# CHECK-NEXT: 1 3 1.00 fcvtps wzr, d9
+# CHECK-NEXT: 1 3 1.00 fcvtps x12, d20
+# CHECK-NEXT: 1 3 1.00 fcvtpu w30, d23
+# CHECK-NEXT: 1 3 1.00 fcvtpu x29, d3
+# CHECK-NEXT: 1 3 1.00 fcvtms w2, d3
+# CHECK-NEXT: 1 3 1.00 fcvtms x4, d5
+# CHECK-NEXT: 1 3 1.00 fcvtmu w6, d7
+# CHECK-NEXT: 1 3 1.00 fcvtmu x8, d9
+# CHECK-NEXT: 1 3 1.00 fcvtzs w10, d11
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, d13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w14, d15
+# CHECK-NEXT: 1 3 1.00 fcvtzu x15, d16
+# CHECK-NEXT: 1 3 1.00 scvtf d17, w18
+# CHECK-NEXT: 1 3 1.00 scvtf d19, x20
+# CHECK-NEXT: 1 3 1.00 ucvtf d21, w22
+# CHECK-NEXT: 1 3 1.00 ucvtf d23, x24
+# CHECK-NEXT: 1 3 1.00 fcvtas w25, d26
+# CHECK-NEXT: 1 3 1.00 fcvtas x27, d28
+# CHECK-NEXT: 1 3 1.00 fcvtau w29, d30
+# CHECK-NEXT: 1 3 1.00 fcvtau xzr, d0
+# CHECK-NEXT: 2 2 1.00 fmov w3, s9
+# CHECK-NEXT: 1 3 1.00 fmov s9, w3
+# CHECK-NEXT: 2 2 1.00 fmov x20, d31
+# CHECK-NEXT: 1 3 1.00 fmov d1, x15
+# CHECK-NEXT: 2 2 1.00 fmov x3, v12.d[1]
+# CHECK-NEXT: 2 5 1.00 fmov v1.d[1], x19
+# CHECK-NEXT: 1 2 0.25 fmov s2, #0.12500000
+# CHECK-NEXT: 1 2 0.25 fmov s3, #1.00000000
+# CHECK-NEXT: 1 2 0.25 fmov d30, #16.00000000
+# CHECK-NEXT: 1 2 0.25 fmov s4, #1.06250000
+# CHECK-NEXT: 1 2 0.25 fmov d10, #1.93750000
+# CHECK-NEXT: 1 2 0.25 fmov s12, #-1.00000000
+# CHECK-NEXT: 1 2 0.25 fmov d16, #8.50000000
+# CHECK-NEXT: 2 5 0.33 * ldr w3, #0
+# CHECK-NEXT: 2 5 0.33 * ldr x29, #4
+# CHECK-NEXT: 2 5 0.33 * ldrsw xzr, #-4
+# CHECK-NEXT: 2 7 0.33 * ldr s0, #8
+# CHECK-NEXT: 2 7 0.33 * ldr d0, #1048572
+# CHECK-NEXT: 2 7 0.33 * ldr q0, #-1048576
+# CHECK-NEXT: 2 5 0.33 U prfm pldl1strm, #0
+# CHECK-NEXT: 2 5 0.33 U prfm #22, #0
+# CHECK-NEXT: 3 5 0.50 * * U stxrb w18, w8, [sp]
+# CHECK-NEXT: 3 5 0.50 * * U stxrh w24, w15, [x16]
+# CHECK-NEXT: 3 5 0.50 * * U stxr w5, w6, [x17]
+# CHECK-NEXT: 3 5 0.50 * * U stxr w1, x10, [x21]
+# CHECK-NEXT: 1 4 0.33 * * U ldxrb w30, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U ldxrh w17, [x4]
+# CHECK-NEXT: 1 4 0.33 * * U ldxr w22, [sp]
+# CHECK-NEXT: 1 4 0.33 * * U ldxr x11, [x29]
+# CHECK-NEXT: 1 4 0.33 * * U ldxr x11, [x29]
+# CHECK-NEXT: 1 4 0.33 * * U ldxr x11, [x29]
+# CHECK-NEXT: 3 5 0.50 * * U stxp w12, w11, w10, [sp]
+# CHECK-NEXT: 3 5 0.50 * * U stxp wzr, x27, x9, [x12]
+# CHECK-NEXT: 2 4 0.33 * * U ldxp w0, wzr, [sp]
+# CHECK-NEXT: 2 4 0.33 * * U ldxp x17, x0, [x18]
+# CHECK-NEXT: 2 4 0.33 * * U ldxp x17, x0, [x18]
+# CHECK-NEXT: 3 5 0.50 * * U stlxrb w12, w22, [x0]
+# CHECK-NEXT: 3 5 0.50 * * U stlxrh w10, w1, [x1]
+# CHECK-NEXT: 3 5 0.50 * * U stlxr w9, w2, [x2]
+# CHECK-NEXT: 3 5 0.50 * * U stlxr w9, x3, [sp]
+# CHECK-NEXT: 1 4 0.33 * * U ldaxrb w8, [x4]
+# CHECK-NEXT: 1 4 0.33 * * U ldaxrh w7, [x5]
+# CHECK-NEXT: 1 4 0.33 * * U ldaxr w6, [sp]
+# CHECK-NEXT: 1 4 0.33 * * U ldaxr x5, [x6]
+# CHECK-NEXT: 1 4 0.33 * * U ldaxr x5, [x6]
+# CHECK-NEXT: 1 4 0.33 * * U ldaxr x5, [x6]
+# CHECK-NEXT: 3 5 0.50 * * U stlxp w4, w5, w6, [sp]
+# CHECK-NEXT: 3 5 0.50 * * U stlxp wzr, x6, x7, [x1]
+# CHECK-NEXT: 2 4 0.33 * * U ldaxp w5, w18, [sp]
+# CHECK-NEXT: 2 4 0.33 * * U ldaxp x6, x19, [x22]
+# CHECK-NEXT: 2 4 0.33 * * U ldaxp x6, x19, [x22]
+# CHECK-NEXT: 2 1 0.50 * U stlrb w24, [sp]
+# CHECK-NEXT: 2 1 0.50 * U stlrh w25, [x30]
+# CHECK-NEXT: 2 1 0.50 * U stlr w26, [x29]
+# CHECK-NEXT: 2 1 0.50 * U stlr x27, [x28]
+# CHECK-NEXT: 2 1 0.50 * U stlr x27, [x28]
+# CHECK-NEXT: 2 1 0.50 * U stlr x27, [x28]
+# CHECK-NEXT: 1 4 0.33 * U ldarb w23, [sp]
+# CHECK-NEXT: 1 4 0.33 * U ldarh w22, [x30]
+# CHECK-NEXT: 1 4 0.33 * U ldar wzr, [x29]
+# CHECK-NEXT: 1 4 0.33 * U ldar x21, [x28]
+# CHECK-NEXT: 1 4 0.33 * U ldar x21, [x28]
+# CHECK-NEXT: 1 4 0.33 * U ldar x21, [x28]
+# CHECK-NEXT: 2 1 0.50 * sturb w9, [sp]
+# CHECK-NEXT: 2 1 0.50 * sturh wzr, [x12, #255]
+# CHECK-NEXT: 2 1 0.50 * stur w16, [x0, #-256]
+# CHECK-NEXT: 2 1 0.50 * stur x28, [x14, #1]
+# CHECK-NEXT: 1 4 0.33 * ldurb w1, [x20, #255]
+# CHECK-NEXT: 1 4 0.33 * ldurh w20, [x1, #255]
+# CHECK-NEXT: 1 4 0.33 * ldur w12, [sp, #255]
+# CHECK-NEXT: 1 4 0.33 * ldur xzr, [x12, #255]
+# CHECK-NEXT: 1 4 0.33 * ldursb x9, [x7, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldursh x17, [x19, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldursw x20, [x15, #-256]
+# CHECK-NEXT: 1 4 0.33 U prfum pldl2keep, [sp, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldursb w19, [x1, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldursh w15, [x21, #-256]
+# CHECK-NEXT: 2 2 0.50 * stur b0, [sp, #1]
+# CHECK-NEXT: 2 2 0.50 * stur h12, [x12, #-1]
+# CHECK-NEXT: 2 2 0.50 * stur s15, [x0, #255]
+# CHECK-NEXT: 2 2 0.50 * stur d31, [x5, #25]
+# CHECK-NEXT: 2 2 0.50 * stur q9, [x5]
+# CHECK-NEXT: 1 6 0.33 * ldur b3, [sp]
+# CHECK-NEXT: 1 6 0.33 * ldur h5, [x4, #-256]
+# CHECK-NEXT: 1 6 0.33 * ldur s7, [x12, #-1]
+# CHECK-NEXT: 1 6 0.33 * ldur d11, [x19, #4]
+# CHECK-NEXT: 1 6 0.33 * ldur q13, [x1, #2]
+# CHECK-NEXT: 3 1 0.50 * strb w9, [x2], #255
+# CHECK-NEXT: 3 1 0.50 * strb w10, [x3], #1
+# CHECK-NEXT: 3 1 0.50 * strb w10, [x3], #-256
+# CHECK-NEXT: 3 1 0.50 * strh w9, [x2], #255
+# CHECK-NEXT: 3 1 0.50 * strh w9, [x2], #1
+# CHECK-NEXT: 3 1 0.50 * strh w10, [x3], #-256
+# CHECK-NEXT: 3 1 0.50 * str w19, [sp], #255
+# CHECK-NEXT: 3 1 0.50 * str w20, [x30], #1
+# CHECK-NEXT: 3 1 0.50 * str w21, [x12], #-256
+# CHECK-NEXT: 3 1 0.50 * str xzr, [x9], #255
+# CHECK-NEXT: 3 1 0.50 * str x2, [x3], #1
+# CHECK-NEXT: 3 1 0.50 * str x19, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrb w9, [x2], #255
+# CHECK-NEXT: 2 4 0.33 * ldrb w10, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldrb w10, [x3], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrh w9, [x2], #255
+# CHECK-NEXT: 2 4 0.33 * ldrh w9, [x2], #1
+# CHECK-NEXT: 2 4 0.33 * ldrh w10, [x3], #-256
+# CHECK-NEXT: 2 4 0.33 * ldr w19, [sp], #255
+# CHECK-NEXT: 2 4 0.33 * ldr w20, [x30], #1
+# CHECK-NEXT: 2 4 0.33 * ldr w21, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldr xzr, [x9], #255
+# CHECK-NEXT: 2 4 0.33 * ldr x2, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldr x19, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrsb xzr, [x9], #255
+# CHECK-NEXT: 2 4 0.33 * ldrsb x2, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldrsb x19, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrsh xzr, [x9], #255
+# CHECK-NEXT: 2 4 0.33 * ldrsh x2, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldrsh x19, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrsw xzr, [x9], #255
+# CHECK-NEXT: 2 4 0.33 * ldrsw x2, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldrsw x19, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrsb wzr, [x9], #255
+# CHECK-NEXT: 2 4 0.33 * ldrsb w2, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldrsb w19, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrsh wzr, [x9], #255
+# CHECK-NEXT: 2 4 0.33 * ldrsh w2, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldrsh w19, [x12], #-256
+# CHECK-NEXT: 4 2 0.50 * str b0, [x0], #255
+# CHECK-NEXT: 4 2 0.50 * str b3, [x3], #1
+# CHECK-NEXT: 4 2 0.50 * str b5, [sp], #-256
+# CHECK-NEXT: 4 2 0.50 * str h10, [x10], #255
+# CHECK-NEXT: 4 2 0.50 * str h13, [x23], #1
+# CHECK-NEXT: 4 2 0.50 * str h15, [sp], #-256
+# CHECK-NEXT: 4 2 0.50 * str s20, [x20], #255
+# CHECK-NEXT: 4 2 0.50 * str s23, [x23], #1
+# CHECK-NEXT: 4 2 0.50 * str s25, [x0], #-256
+# CHECK-NEXT: 4 2 0.50 * str d20, [x20], #255
+# CHECK-NEXT: 4 2 0.50 * str d23, [x23], #1
+# CHECK-NEXT: 4 2 0.50 * str d25, [x0], #-256
+# CHECK-NEXT: 3 6 0.33 * ldr b0, [x0], #255
+# CHECK-NEXT: 3 6 0.33 * ldr b3, [x3], #1
+# CHECK-NEXT: 3 6 0.33 * ldr b5, [sp], #-256
+# CHECK-NEXT: 3 6 0.33 * ldr h10, [x10], #255
+# CHECK-NEXT: 3 6 0.33 * ldr h13, [x23], #1
+# CHECK-NEXT: 3 6 0.33 * ldr h15, [sp], #-256
+# CHECK-NEXT: 3 6 0.33 * ldr s20, [x20], #255
+# CHECK-NEXT: 3 6 0.33 * ldr s23, [x23], #1
+# CHECK-NEXT: 3 6 0.33 * ldr s25, [x0], #-256
+# CHECK-NEXT: 3 6 0.33 * ldr d20, [x20], #255
+# CHECK-NEXT: 3 6 0.33 * ldr d23, [x23], #1
+# CHECK-NEXT: 3 6 0.33 * ldr d25, [x0], #-256
+# CHECK-NEXT: 3 6 0.33 * ldr q20, [x1], #255
+# CHECK-NEXT: 3 6 0.33 * ldr q23, [x9], #1
+# CHECK-NEXT: 3 6 0.33 * ldr q25, [x20], #-256
+# CHECK-NEXT: 4 2 0.50 * str q10, [x1], #255
+# CHECK-NEXT: 4 2 0.50 * str q22, [sp], #1
+# CHECK-NEXT: 4 2 0.50 * str q21, [x20], #-256
+# CHECK-NEXT: 2 4 0.33 * ldr x3, [x4, #0]!
+# CHECK-NEXT: 3 1 0.50 * strb w9, [x2, #255]!
+# CHECK-NEXT: 3 1 0.50 * strb w10, [x3, #1]!
+# CHECK-NEXT: 3 1 0.50 * strb w10, [x3, #-256]!
+# CHECK-NEXT: 3 1 0.50 * strh w9, [x2, #255]!
+# CHECK-NEXT: 3 1 0.50 * strh w9, [x2, #1]!
+# CHECK-NEXT: 3 1 0.50 * strh w10, [x3, #-256]!
+# CHECK-NEXT: 3 1 0.50 * str w19, [sp, #255]!
+# CHECK-NEXT: 3 1 0.50 * str w20, [x30, #1]!
+# CHECK-NEXT: 3 1 0.50 * str w21, [x12, #-256]!
+# CHECK-NEXT: 3 1 0.50 * str xzr, [x9, #255]!
+# CHECK-NEXT: 3 1 0.50 * str x2, [x3, #1]!
+# CHECK-NEXT: 3 1 0.50 * str x19, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrb w9, [x2, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrb w10, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrb w10, [x3, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrh w9, [x2, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrh w9, [x2, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrh w10, [x3, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldr w19, [sp, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldr w20, [x30, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldr w21, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldr xzr, [x9, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldr x2, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldr x19, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrsb xzr, [x9, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrsb x2, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrsb x19, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrsh xzr, [x9, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrsh x2, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrsh x19, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrsw xzr, [x9, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrsw x2, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrsw x19, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrsb wzr, [x9, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrsb w2, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrsb w19, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrsh wzr, [x9, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrsh w2, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrsh w19, [x12, #-256]!
+# CHECK-NEXT: 4 2 0.50 * str b0, [x0, #255]!
+# CHECK-NEXT: 4 2 0.50 * str b3, [x3, #1]!
+# CHECK-NEXT: 4 2 0.50 * str b5, [sp, #-256]!
+# CHECK-NEXT: 4 2 0.50 * str h10, [x10, #255]!
+# CHECK-NEXT: 4 2 0.50 * str h13, [x23, #1]!
+# CHECK-NEXT: 4 2 0.50 * str h15, [sp, #-256]!
+# CHECK-NEXT: 4 2 0.50 * str s20, [x20, #255]!
+# CHECK-NEXT: 4 2 0.50 * str s23, [x23, #1]!
+# CHECK-NEXT: 4 2 0.50 * str s25, [x0, #-256]!
+# CHECK-NEXT: 4 2 0.50 * str d20, [x20, #255]!
+# CHECK-NEXT: 4 2 0.50 * str d23, [x23, #1]!
+# CHECK-NEXT: 4 2 0.50 * str d25, [x0, #-256]!
+# CHECK-NEXT: 3 6 0.33 * ldr b0, [x0, #255]!
+# CHECK-NEXT: 3 6 0.33 * ldr b3, [x3, #1]!
+# CHECK-NEXT: 3 6 0.33 * ldr b5, [sp, #-256]!
+# CHECK-NEXT: 3 6 0.33 * ldr h10, [x10, #255]!
+# CHECK-NEXT: 3 6 0.33 * ldr h13, [x23, #1]!
+# CHECK-NEXT: 3 6 0.33 * ldr h15, [sp, #-256]!
+# CHECK-NEXT: 3 6 0.33 * ldr s20, [x20, #255]!
+# CHECK-NEXT: 3 6 0.33 * ldr s23, [x23, #1]!
+# CHECK-NEXT: 3 6 0.33 * ldr s25, [x0, #-256]!
+# CHECK-NEXT: 3 6 0.33 * ldr d20, [x20, #255]!
+# CHECK-NEXT: 3 6 0.33 * ldr d23, [x23, #1]!
+# CHECK-NEXT: 3 6 0.33 * ldr d25, [x0, #-256]!
+# CHECK-NEXT: 3 6 0.33 * ldr q20, [x1, #255]!
+# CHECK-NEXT: 3 6 0.33 * ldr q23, [x9, #1]!
+# CHECK-NEXT: 3 6 0.33 * ldr q25, [x20, #-256]!
+# CHECK-NEXT: 4 2 0.50 * str q10, [x1, #255]!
+# CHECK-NEXT: 4 2 0.50 * str q22, [sp, #1]!
+# CHECK-NEXT: 4 2 0.50 * str q21, [x20, #-256]!
+# CHECK-NEXT: 2 1 0.50 * sttrb w9, [sp]
+# CHECK-NEXT: 2 1 0.50 * sttrh wzr, [x12, #255]
+# CHECK-NEXT: 2 1 0.50 * sttr w16, [x0, #-256]
+# CHECK-NEXT: 2 1 0.50 * sttr x28, [x14, #1]
+# CHECK-NEXT: 1 4 0.33 * ldtrb w1, [x20, #255]
+# CHECK-NEXT: 1 4 0.33 * ldtrh w20, [x1, #255]
+# CHECK-NEXT: 1 4 0.33 * ldtr w12, [sp, #255]
+# CHECK-NEXT: 1 4 0.33 * ldtr xzr, [x12, #255]
+# CHECK-NEXT: 1 4 0.33 * ldtrsb x9, [x7, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldtrsh x17, [x19, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldtrsw x20, [x15, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldtrsb w19, [x1, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldtrsh w15, [x21, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldr x4, [x29]
+# CHECK-NEXT: 1 4 0.33 * ldr x30, [x12, #32760]
+# CHECK-NEXT: 1 4 0.33 * ldr x20, [sp, #8]
+# CHECK-NEXT: 1 4 0.33 * ldr xzr, [sp]
+# CHECK-NEXT: 1 4 0.33 * ldr w2, [sp]
+# CHECK-NEXT: 1 4 0.33 * ldr w17, [sp, #16380]
+# CHECK-NEXT: 1 4 0.33 * ldr w13, [x2, #4]
+# CHECK-NEXT: 1 4 0.33 * ldrsw x2, [x5, #4]
+# CHECK-NEXT: 1 4 0.33 * ldrsw x23, [sp, #16380]
+# CHECK-NEXT: 1 4 0.33 * ldrh w2, [x4]
+# CHECK-NEXT: 1 4 0.33 * ldrsh w23, [x6, #8190]
+# CHECK-NEXT: 1 4 0.33 * ldrsh wzr, [sp, #2]
+# CHECK-NEXT: 1 4 0.33 * ldrsh x29, [x2, #2]
+# CHECK-NEXT: 1 4 0.33 * ldrb w26, [x3, #121]
+# CHECK-NEXT: 1 4 0.33 * ldrb w12, [x2]
+# CHECK-NEXT: 1 4 0.33 * ldrsb w27, [sp, #4095]
+# CHECK-NEXT: 1 4 0.33 * ldrsb xzr, [x15]
+# CHECK-NEXT: 2 1 0.50 * str x30, [sp]
+# CHECK-NEXT: 2 1 0.50 * str w20, [x4, #16380]
+# CHECK-NEXT: 2 1 0.50 * strh w17, [sp, #8190]
+# CHECK-NEXT: 2 1 0.50 * strb w23, [x3, #4095]
+# CHECK-NEXT: 2 1 0.50 * strb wzr, [x2]
+# CHECK-NEXT: 1 6 0.33 * ldr b31, [sp, #4095]
+# CHECK-NEXT: 1 6 0.33 * ldr h20, [x2, #8190]
+# CHECK-NEXT: 1 6 0.33 * ldr s10, [x19, #16380]
+# CHECK-NEXT: 1 6 0.33 * ldr d3, [x10, #32760]
+# CHECK-NEXT: 2 2 0.50 * str q12, [sp, #65520]
+# CHECK-NEXT: 1 6 0.33 * ldr h3, [sp, x5]
+# CHECK-NEXT: 1 6 0.33 * ldr h9, [x27, x6]
+# CHECK-NEXT: 2 7 0.33 * ldr h10, [x30, x7, lsl #1]
+# CHECK-NEXT: 2 2 0.50 * str h11, [x29, x3, sxtx]
+# CHECK-NEXT: 2 2 0.50 * str h12, [x28, xzr, sxtx]
+# CHECK-NEXT: 3 2 0.50 * str h13, [x27, x5, sxtx #1]
+# CHECK-NEXT: 1 6 0.33 * ldr h14, [x26, w6, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ldr h15, [x25, w7, uxtw]
+# CHECK-NEXT: 2 7 0.33 * ldr h16, [x24, w8, uxtw #1]
+# CHECK-NEXT: 1 6 0.33 * ldr h17, [x23, w9, sxtw]
+# CHECK-NEXT: 2 2 0.50 * str h18, [x22, w10, sxtw]
+# CHECK-NEXT: 2 7 0.33 * ldr h19, [x21, wzr, sxtw #1]
+# CHECK-NEXT: 1 4 0.33 * ldrb w3, [sp, x5]
+# CHECK-NEXT: 1 4 0.33 * ldrb w9, [x27, x6]
+# CHECK-NEXT: 1 4 0.33 * ldrsb w10, [x30, x7]
+# CHECK-NEXT: 1 4 0.33 * ldrb w11, [x29, x3, sxtx]
+# CHECK-NEXT: 2 1 0.50 * strb w12, [x28, xzr, sxtx]
+# CHECK-NEXT: 1 4 0.33 * ldrb w14, [x26, w6, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrsb w15, [x25, w7, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrb w17, [x23, w9, sxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrsb x18, [x22, w10, sxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrsh w3, [sp, x5]
+# CHECK-NEXT: 1 4 0.33 * ldrsh w9, [x27, x6]
+# CHECK-NEXT: 1 4 0.33 * ldrh w10, [x30, x7, lsl #1]
+# CHECK-NEXT: 2 1 0.50 * strh w11, [x29, x3, sxtx]
+# CHECK-NEXT: 1 4 0.33 * ldrh w12, [x28, xzr, sxtx]
+# CHECK-NEXT: 1 4 0.33 * ldrsh x13, [x27, x5, sxtx #1]
+# CHECK-NEXT: 1 4 0.33 * ldrh w14, [x26, w6, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrh w15, [x25, w7, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrsh w16, [x24, w8, uxtw #1]
+# CHECK-NEXT: 1 4 0.33 * ldrh w17, [x23, w9, sxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrh w18, [x22, w10, sxtw]
+# CHECK-NEXT: 2 1 0.50 * strh w19, [x21, wzr, sxtw #1]
+# CHECK-NEXT: 1 4 0.33 * ldr w3, [sp, x5]
+# CHECK-NEXT: 1 6 0.33 * ldr s9, [x27, x6]
+# CHECK-NEXT: 1 4 0.33 * ldr w10, [x30, x7, lsl #2]
+# CHECK-NEXT: 1 4 0.33 * ldr w11, [x29, x3, sxtx]
+# CHECK-NEXT: 2 2 0.50 * str s12, [x28, xzr, sxtx]
+# CHECK-NEXT: 2 1 0.50 * str w13, [x27, x5, sxtx #2]
+# CHECK-NEXT: 2 1 0.50 * str w14, [x26, w6, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldr w15, [x25, w7, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldr w16, [x24, w8, uxtw #2]
+# CHECK-NEXT: 1 4 0.33 * ldrsw x17, [x23, w9, sxtw]
+# CHECK-NEXT: 1 4 0.33 * ldr w18, [x22, w10, sxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrsw x19, [x21, wzr, sxtw #2]
+# CHECK-NEXT: 1 4 0.33 * ldr x3, [sp, x5]
+# CHECK-NEXT: 2 1 0.50 * str x9, [x27, x6]
+# CHECK-NEXT: 1 6 0.33 * ldr d10, [x30, x7, lsl #3]
+# CHECK-NEXT: 2 1 0.50 * str x11, [x29, x3, sxtx]
+# CHECK-NEXT: 1 4 0.33 * ldr x12, [x28, xzr, sxtx]
+# CHECK-NEXT: 1 4 0.33 * ldr x13, [x27, x5, sxtx #3]
+# CHECK-NEXT: 1 4 0.33 U prfm pldl1keep, [x26, w6, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldr x15, [x25, w7, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldr x16, [x24, w8, uxtw #3]
+# CHECK-NEXT: 1 4 0.33 * ldr x17, [x23, w9, sxtw]
+# CHECK-NEXT: 1 4 0.33 * ldr x18, [x22, w10, sxtw]
+# CHECK-NEXT: 2 2 0.50 * str d19, [x21, wzr, sxtw #3]
+# CHECK-NEXT: 1 6 0.33 * ldr q3, [sp, x5]
+# CHECK-NEXT: 1 6 0.33 * ldr q9, [x27, x6]
+# CHECK-NEXT: 2 7 0.33 * ldr q10, [x30, x7, lsl #4]
+# CHECK-NEXT: 2 2 0.50 * str q11, [x29, x3, sxtx]
+# CHECK-NEXT: 2 2 0.50 * str q12, [x28, xzr, sxtx]
+# CHECK-NEXT: 3 2 0.50 * str q13, [x27, x5, sxtx #4]
+# CHECK-NEXT: 1 6 0.33 * ldr q14, [x26, w6, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ldr q15, [x25, w7, uxtw]
+# CHECK-NEXT: 2 7 0.33 * ldr q16, [x24, w8, uxtw #4]
+# CHECK-NEXT: 1 6 0.33 * ldr q17, [x23, w9, sxtw]
+# CHECK-NEXT: 2 2 0.50 * str q18, [x22, w10, sxtw]
+# CHECK-NEXT: 2 7 0.33 * ldr q19, [x21, wzr, sxtw #4]
+# CHECK-NEXT: 2 4 0.33 * ldp w3, w5, [sp]
+# CHECK-NEXT: 2 1 0.50 * stp wzr, w9, [sp, #252]
+# CHECK-NEXT: 2 4 0.33 * ldp w2, wzr, [sp, #-256]
+# CHECK-NEXT: 2 4 0.33 * ldp w9, w10, [sp, #4]
+# CHECK-NEXT: 5 5 1.00 * ldpsw x9, x10, [sp, #4]
+# CHECK-NEXT: 5 5 1.00 * ldpsw x9, x10, [x2, #-256]
+# CHECK-NEXT: 5 5 1.00 * ldpsw x20, x30, [sp, #252]
+# CHECK-NEXT: 2 4 0.33 * ldp x21, x29, [x2, #504]
+# CHECK-NEXT: 2 4 0.33 * ldp x22, x23, [x3, #-512]
+# CHECK-NEXT: 2 4 0.33 * ldp x24, x25, [x4, #8]
+# CHECK-NEXT: 2 6 0.33 * ldp s29, s28, [sp, #252]
+# CHECK-NEXT: 2 2 0.50 * stp s27, s26, [sp, #-256]
+# CHECK-NEXT: 2 6 0.33 * ldp s1, s2, [x3, #44]
+# CHECK-NEXT: 2 2 0.50 * stp d3, d5, [x9, #504]
+# CHECK-NEXT: 2 2 0.50 * stp d7, d11, [x10, #-512]
+# CHECK-NEXT: 2 6 0.33 * ldp d2, d3, [x30, #-8]
+# CHECK-NEXT: 3 2 1.00 * stp q3, q5, [sp]
+# CHECK-NEXT: 3 2 1.00 * stp q17, q19, [sp, #1008]
+# CHECK-NEXT: 3 6 0.67 * ldp q23, q29, [x1, #-1024]
+# CHECK-NEXT: 3 4 0.33 * ldp w3, w5, [sp], #0
+# CHECK-NEXT: 3 1 0.50 * stp wzr, w9, [sp], #252
+# CHECK-NEXT: 3 4 0.33 * ldp w2, wzr, [sp], #-256
+# CHECK-NEXT: 3 4 0.33 * ldp w9, w10, [sp], #4
+# CHECK-NEXT: 6 5 1.00 * ldpsw x9, x10, [sp], #4
+# CHECK-NEXT: 6 5 1.00 * ldpsw x9, x10, [x2], #-256
+# CHECK-NEXT: 6 5 1.00 * ldpsw x20, x30, [sp], #252
+# CHECK-NEXT: 3 4 0.33 * ldp x21, x29, [x2], #504
+# CHECK-NEXT: 3 4 0.33 * ldp x22, x23, [x3], #-512
+# CHECK-NEXT: 3 4 0.33 * ldp x24, x25, [x4], #8
+# CHECK-NEXT: 4 6 0.33 * ldp s29, s28, [sp], #252
+# CHECK-NEXT: 4 2 0.50 * stp s27, s26, [sp], #-256
+# CHECK-NEXT: 4 6 0.33 * ldp s1, s2, [x3], #44
+# CHECK-NEXT: 4 2 0.50 * stp d3, d5, [x9], #504
+# CHECK-NEXT: 4 2 0.50 * stp d7, d11, [x10], #-512
+# CHECK-NEXT: 4 6 0.33 * ldp d2, d3, [x30], #-8
+# CHECK-NEXT: 4 2 1.00 * stp q3, q5, [sp], #0
+# CHECK-NEXT: 4 2 1.00 * stp q17, q19, [sp], #1008
+# CHECK-NEXT: 6 6 0.67 * ldp q23, q29, [x1], #-1024
+# CHECK-NEXT: 3 4 0.33 * ldp w3, w5, [sp, #0]!
+# CHECK-NEXT: 3 1 0.50 * stp wzr, w9, [sp, #252]!
+# CHECK-NEXT: 3 4 0.33 * ldp w2, wzr, [sp, #-256]!
+# CHECK-NEXT: 3 4 0.33 * ldp w9, w10, [sp, #4]!
+# CHECK-NEXT: 6 5 1.00 * ldpsw x9, x10, [sp, #4]!
+# CHECK-NEXT: 6 5 1.00 * ldpsw x9, x10, [x2, #-256]!
+# CHECK-NEXT: 6 5 1.00 * ldpsw x20, x30, [sp, #252]!
+# CHECK-NEXT: 3 4 0.33 * ldp x21, x29, [x2, #504]!
+# CHECK-NEXT: 3 4 0.33 * ldp x22, x23, [x3, #-512]!
+# CHECK-NEXT: 3 4 0.33 * ldp x24, x25, [x4, #8]!
+# CHECK-NEXT: 4 6 0.33 * ldp s29, s28, [sp, #252]!
+# CHECK-NEXT: 4 2 0.50 * stp s27, s26, [sp, #-256]!
+# CHECK-NEXT: 4 6 0.33 * ldp s1, s2, [x3, #44]!
+# CHECK-NEXT: 4 2 0.50 * stp d3, d5, [x9, #504]!
+# CHECK-NEXT: 4 2 0.50 * stp d7, d11, [x10, #-512]!
+# CHECK-NEXT: 4 6 0.33 * ldp d2, d3, [x30, #-8]!
+# CHECK-NEXT: 5 2 1.00 * stp q3, q5, [sp, #0]!
+# CHECK-NEXT: 5 2 1.00 * stp q17, q19, [sp, #1008]!
+# CHECK-NEXT: 6 6 0.67 * ldp q23, q29, [x1, #-1024]!
+# CHECK-NEXT: 2 4 0.33 * ldnp w3, w5, [sp]
+# CHECK-NEXT: 2 1 0.50 * stnp wzr, w9, [sp, #252]
+# CHECK-NEXT: 2 4 0.33 * ldnp w2, wzr, [sp, #-256]
+# CHECK-NEXT: 2 4 0.33 * ldnp w9, w10, [sp, #4]
+# CHECK-NEXT: 2 4 0.33 * ldnp x21, x29, [x2, #504]
+# CHECK-NEXT: 2 4 0.33 * ldnp x22, x23, [x3, #-512]
+# CHECK-NEXT: 2 4 0.33 * ldnp x24, x25, [x4, #8]
+# CHECK-NEXT: 2 6 0.33 * ldnp s29, s28, [sp, #252]
+# CHECK-NEXT: 2 2 0.50 * stnp s27, s26, [sp, #-256]
+# CHECK-NEXT: 2 6 0.33 * ldnp s1, s2, [x3, #44]
+# CHECK-NEXT: 2 2 0.50 * stnp d3, d5, [x9, #504]
+# CHECK-NEXT: 2 2 0.50 * stnp d7, d11, [x10, #-512]
+# CHECK-NEXT: 2 6 0.33 * ldnp d2, d3, [x30, #-8]
+# CHECK-NEXT: 3 2 1.00 * stnp q3, q5, [sp]
+# CHECK-NEXT: 3 2 1.00 * stnp q17, q19, [sp, #1008]
+# CHECK-NEXT: 3 6 0.67 * ldnp q23, q29, [x1, #-1024]
+# CHECK-NEXT: 1 1 0.13 mov w3, #983055
+# CHECK-NEXT: 1 1 0.13 mov x10, #-6148914691236517206
+# CHECK-NEXT: 1 1 0.25 ands w4, w4, #0xf000f
+# CHECK-NEXT: 1 1 0.25 ands x11, x11, #0xaaaaaaaaaaaaaaaa
+# CHECK-NEXT: 1 1 0.13 and w12, w23, w21
+# CHECK-NEXT: 1 1 0.13 and w16, w15, w1, lsl #1
+# CHECK-NEXT: 1 1 0.13 and w9, w4, w10, lsl #31
+# CHECK-NEXT: 1 1 0.13 and w3, w30, w11
+# CHECK-NEXT: 1 1 0.13 and x3, x5, x7, lsl #63
+# CHECK-NEXT: 1 1 0.13 and x5, x14, x19, asr #4
+# CHECK-NEXT: 1 1 0.13 and w3, w17, w19, ror #31
+# CHECK-NEXT: 1 1 0.13 and w0, w2, wzr, lsr #17
+# CHECK-NEXT: 1 1 0.13 and w3, w30, w11, asr #2
+# CHECK-NEXT: 1 1 0.13 and xzr, x4, x26
+# CHECK-NEXT: 1 1 0.13 and w3, wzr, w20, ror #2
+# CHECK-NEXT: 1 1 0.13 and x7, x20, xzr, asr #63
+# CHECK-NEXT: 1 1 0.13 bic x13, x20, x14, lsl #47
+# CHECK-NEXT: 1 1 0.13 bic w2, w7, w9
+# CHECK-NEXT: 1 1 0.13 orr w2, w7, w0, asr #31
+# CHECK-NEXT: 1 1 0.13 orr x8, x9, x10, lsl #12
+# CHECK-NEXT: 1 1 0.13 orn x3, x5, x7, asr #2
+# CHECK-NEXT: 1 1 0.13 orn w2, w5, w29
+# CHECK-NEXT: 1 2 0.50 ands w7, wzr, w9, lsl #1
+# CHECK-NEXT: 1 2 0.50 ands x3, x5, x20, ror #63
+# CHECK-NEXT: 1 1 0.25 bics w3, w5, w7
+# CHECK-NEXT: 1 2 0.50 bics x3, xzr, x3, lsl #1
+# CHECK-NEXT: 1 2 0.50 tst w3, w7, lsl #31
+# CHECK-NEXT: 1 2 0.50 tst x2, x20, asr #2
+# CHECK-NEXT: 1 0 0.10 mov x3, x6
+# CHECK-NEXT: 1 0 0.10 mov x3, xzr
+# CHECK-NEXT: 1 0 0.10 mov wzr, w2
+# CHECK-NEXT: 1 0 0.10 mov w3, w5
+# CHECK-NEXT: 1 1 0.13 movz w2, #0, lsl #16
+# CHECK-NEXT: 1 1 0.13 mov w2, #-1235
+# CHECK-NEXT: 1 1 0.13 mov x2, #5299989643264
+# CHECK-NEXT: 1 0 0.10 mov x2, #0
+# CHECK-NEXT: 1 1 0.13 movk w3, #0
+# CHECK-NEXT: 1 1 0.13 movz x4, #0, lsl #16
+# CHECK-NEXT: 1 1 0.13 movk w5, #0, lsl #16
+# CHECK-NEXT: 1 1 0.13 movz x6, #0, lsl #32
+# CHECK-NEXT: 1 1 0.13 movk x7, #0, lsl #32
+# CHECK-NEXT: 1 1 0.13 movz x8, #0, lsl #48
+# CHECK-NEXT: 1 1 0.13 movk x9, #0, lsl #48
+# CHECK-NEXT: 1 1 0.13 adr x2, #1600
+# CHECK-NEXT: 1 1 0.13 adrp x21, #6553600
+# CHECK-NEXT: 1 1 0.13 adr x0, #262144
+# CHECK-NEXT: 1 1 0.33 tbz x12, #62, #0
+# CHECK-NEXT: 1 1 0.33 tbz x12, #62, #4
+# CHECK-NEXT: 1 1 0.33 tbz x12, #62, #-32768
+# CHECK-NEXT: 1 1 0.33 tbnz x12, #60, #32764
+# CHECK-NEXT: 1 1 0.33 b #4
+# CHECK-NEXT: 1 1 0.33 b #-4
+# CHECK-NEXT: 1 1 0.33 b #134217724
+# CHECK-NEXT: 1 1 0.33 br x20
+# CHECK-NEXT: 2 1 0.33 blr xzr
+# CHECK-NEXT: 1 1 0.33 U ret x10
+# CHECK-NEXT: 1 1 0.33 U ret
+# CHECK-NEXT: 1 1 0.33 U eret
+# CHECK-NEXT: 1 1 0.33 U drps
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: 7.33 7.33 7.33 33.00 33.00 40.50 40.50 40.50 40.50 99.00 99.00 165.00 280.25 165.25 77.92 77.92 77.92 77.92 77.92 77.92 66.00 180.00 150.00 20.00 10.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add w2, w3, #4095
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add w30, w29, #1, lsl #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add w13, w5, #4095, lsl #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add x5, x7, #1638
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add w20, wsp, #801
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add wsp, wsp, #1104
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add wsp, w30, #4084
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add x0, x24, #291
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add x3, x24, #4095, lsl #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add x8, sp, #1074
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add sp, x29, #3816
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sub w0, wsp, #4077
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sub w4, w20, #546, lsl #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sub sp, sp, #288
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sub wsp, w19, #16
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adds w13, w23, #291, lsl #12
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn w2, #4095
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adds w20, wsp, #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn x3, #1, lsl #12
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp sp, #20, lsl #12
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp x30, #4095
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - subs x4, sp, #3822
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn w3, #291, lsl #12
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn wsp, #1365
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn sp, #1092, lsl #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - mov sp, x30
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - mov wsp, w20
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - mov x11, sp
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - mov w24, wsp
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add w3, w5, w7
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add wzr, w3, w5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add w20, wzr, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add w4, w6, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add w11, w13, w15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add w9, w3, wzr, lsl #10
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add w17, w29, w20, lsl #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add w21, w22, w23, lsr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add w24, w25, w26, lsr #18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add w27, w28, w29, lsr #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add w2, w3, w4, asr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add w5, w6, w7, asr #21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add w8, w9, w10, asr #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add x3, x5, x7
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add xzr, x3, x5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add x20, xzr, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add x4, x6, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - add x11, x13, x15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add x9, x3, xzr, lsl #10
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add x17, x29, x20, lsl #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add x21, x22, x23, lsr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add x24, x25, x26, lsr #18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add x27, x28, x29, lsr #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add x2, x3, x4, asr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add x5, x6, x7, asr #21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - add x8, x9, x10, asr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adds w3, w5, w7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn w3, w5
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adds w20, wzr, w4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adds w4, w6, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adds w11, w13, w15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds w9, w3, wzr, lsl #10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds w17, w29, w20, lsl #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds w21, w22, w23, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds w24, w25, w26, lsr #18
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds w27, w28, w29, lsr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds w2, w3, w4, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds w5, w6, w7, asr #21
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds w8, w9, w10, asr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adds x3, x5, x7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn x3, x5
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adds x20, xzr, x4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adds x4, x6, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adds x11, x13, x15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds x9, x3, xzr, lsl #10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds x17, x29, x20, lsl #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds x21, x22, x23, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds x24, x25, x26, lsr #18
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds x27, x28, x29, lsr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds x2, x3, x4, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds x5, x6, x7, asr #21
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - adds x8, x9, x10, asr #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sub w3, w5, w7
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sub wzr, w3, w5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sub w4, w6, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sub w11, w13, w15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub w9, w3, wzr, lsl #10
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub w17, w29, w20, lsl #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub w21, w22, w23, lsr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub w24, w25, w26, lsr #18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub w27, w28, w29, lsr #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub w2, w3, w4, asr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub w5, w6, w7, asr #21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub w8, w9, w10, asr #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sub x3, x5, x7
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sub xzr, x3, x5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sub x4, x6, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sub x11, x13, x15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub x9, x3, xzr, lsl #10
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub x17, x29, x20, lsl #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub x21, x22, x23, lsr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub x24, x25, x26, lsr #18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub x27, x28, x29, lsr #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub x2, x3, x4, asr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub x5, x6, x7, asr #21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sub x8, x9, x10, asr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - subs w3, w5, w7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp w3, w5
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - subs w4, w6, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - subs w11, w13, w15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs w9, w3, wzr, lsl #10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs w17, w29, w20, lsl #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs w21, w22, w23, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs w24, w25, w26, lsr #18
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs w27, w28, w29, lsr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs w2, w3, w4, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs w5, w6, w7, asr #21
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs w8, w9, w10, asr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - subs x3, x5, x7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp x3, x5
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - subs x4, x6, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - subs x11, x13, x15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs x9, x3, xzr, lsl #10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs x17, x29, x20, lsl #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs x21, x22, x23, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs x24, x25, x26, lsr #18
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs x27, x28, x29, lsr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs x2, x3, x4, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs x5, x6, x7, asr #21
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - subs x8, x9, x10, asr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn wzr, w4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn w5, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn w6, w7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn w8, w9, lsl #15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn w10, w11, lsl #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn w12, w13, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn w14, w15, lsr #21
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn w16, w17, lsr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn w18, w19, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn w20, w21, asr #22
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn w22, w23, asr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn x0, x3
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn xzr, x4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn x5, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmn x6, x7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn x8, x9, lsl #15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn x10, x11, lsl #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn x12, x13, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn x14, x15, lsr #41
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn x16, x17, lsr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn x18, x19, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn x20, x21, asr #55
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmn x22, x23, asr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp w0, w3
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp wzr, w4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp w5, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp w6, w7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp w8, w9, lsl #15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp w10, w11, lsl #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp w12, w13, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp w14, w15, lsr #21
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp w18, w19, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp w20, w21, asr #22
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp w22, w23, asr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp x0, x3
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp xzr, x4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp x5, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp x6, x7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp x8, x9, lsl #15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp x10, x11, lsl #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp x12, x13, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp x14, x15, lsr #41
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp x16, x17, lsr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp x18, x19, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp x20, x21, asr #55
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - cmp x22, x23, asr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp wzr, w0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cmp xzr, x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adc w29, w27, w25
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adc wzr, w3, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adc w9, wzr, w10
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adc w20, w0, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adc x29, x27, x25
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adc xzr, x3, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adc x9, xzr, x10
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adc x20, x0, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adcs w29, w27, w25
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adcs wzr, w3, w4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adcs w9, wzr, w10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adcs w20, w0, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adcs x29, x27, x25
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adcs xzr, x3, x4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adcs x9, xzr, x10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adcs x20, x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbc w29, w27, w25
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbc wzr, w3, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngc w9, w10
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbc w20, w0, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbc x29, x27, x25
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbc xzr, x3, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngc x9, x10
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbc x20, x0, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbcs w29, w27, w25
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbcs wzr, w3, w4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngcs w9, w10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbcs w20, w0, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbcs x29, x27, x25
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbcs xzr, x3, x4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngcs x9, x10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbcs x20, x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngc w3, w12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngc wzr, w9
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngc w23, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngc x29, x30
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngc xzr, x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngc x0, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngcs w3, w12
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngcs wzr, w9
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngcs w23, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngcs x29, x30
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngcs xzr, x0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ngcs x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbfx x1, x2, #3, #2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr x3, x4, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr wzr, wzr, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbfx w12, w9, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ubfiz x4, x5, #52, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ubfx xzr, x4, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ubfiz x4, xzr, #1, #6
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr x5, x6, #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfi x4, x5, #52, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil xzr, x4, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfc x4, #1, #6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil x5, x6, #12, #52
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sxtb w1, w2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sxtb xzr, w3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sxth w9, w10
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sxth x0, w1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sxtw x3, w30
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - uxtb w1, w2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - uxth w9, w10
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ubfx x3, x30, #0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr w3, w2, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr w9, w10, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr x20, x21, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr w1, wzr, #3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr w3, w2, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr w9, w10, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr x20, x21, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr wzr, wzr, #3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr w3, w2, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsl w9, w10, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsl x20, x21, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsl w1, wzr, #3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbfx w9, w10, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbfiz x2, x3, #63, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr x19, x20, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbfiz x9, x10, #5, #59
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr w9, w10, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbfiz w11, w12, #31, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbfiz w13, w14, #29, #3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbfiz xzr, xzr, #10, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbfx w9, w10, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr x2, x3, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr x19, x20, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr x9, x10, #5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr w9, w10, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr w11, w12, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr w13, w14, #29
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - sbfx xzr, xzr, #10, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil w9, w10, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfi x2, x3, #63, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil x19, x20, #0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfi x9, x10, #5, #59
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil w9, w10, #0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfi w11, w12, #31, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfi w13, w14, #29, #3
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfc xzr, #10, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil w9, w10, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil x2, x3, #63, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil x19, x20, #0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil x9, x10, #5, #59
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil w9, w10, #0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil w11, w12, #31, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil w13, w14, #29, #3
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bfxil xzr, xzr, #10, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ubfx w9, w10, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsl x2, x3, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr x19, x20, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsl x9, x10, #5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr w9, w10, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsl w11, w12, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsl w13, w14, #29
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ubfiz xzr, xzr, #10, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ubfx w9, w10, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr x2, x3, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr x19, x20, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr x9, x10, #5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr w9, w10, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr w11, w12, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr w13, w14, #29
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ubfx xzr, xzr, #10, #11
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - cbz w5, #4
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - cbz x5, #0
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - cbnz x2, #-4
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - cbnz x26, #1048572
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - cbz wzr, #0
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - cbnz xzr, #0
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - b.ne #4
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - b.ge #1048572
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - b.ge #-4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmp w1, #31, #0, eq
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmp w3, #0, #15, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmp wzr, #15, #13, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmp x9, #31, #0, le
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmp x3, #0, #15, gt
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmp xzr, #5, #7, ne
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmn w1, #31, #0, eq
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmn w3, #0, #15, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmn wzr, #15, #13, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmn x9, #31, #0, le
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmn x3, #0, #15, gt
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmn xzr, #5, #7, ne
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmp w1, wzr, #0, eq
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmp w3, w0, #15, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmp wzr, w15, #13, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmp x9, xzr, #0, le
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmp x3, x0, #15, gt
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmp xzr, x5, #7, ne
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmn w1, wzr, #0, eq
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmn w3, w0, #15, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmn wzr, w15, #13, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmn x9, xzr, #0, le
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmn x3, x0, #15, gt
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ccmn xzr, x5, #7, ne
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csel w1, w0, w19, ne
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csel wzr, w5, w9, eq
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csel w9, wzr, w30, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csel w1, w28, wzr, mi
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csel x19, x23, x29, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csel xzr, x3, x4, ge
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csel x5, xzr, x6, hs
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csel x7, x8, xzr, lo
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinc w1, w0, w19, ne
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinc wzr, w5, w9, eq
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinc w9, wzr, w30, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinc w1, w28, wzr, mi
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinc x19, x23, x29, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinc xzr, x3, x4, ge
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinc x5, xzr, x6, hs
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinc x7, x8, xzr, lo
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinv w1, w0, w19, ne
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinv wzr, w5, w9, eq
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinv w9, wzr, w30, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinv w1, w28, wzr, mi
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinv x19, x23, x29, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinv xzr, x3, x4, ge
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinv x5, xzr, x6, hs
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinv x7, x8, xzr, lo
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csneg w1, w0, w19, ne
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csneg wzr, w5, w9, eq
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csneg w9, wzr, w30, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csneg w1, w28, wzr, mi
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csneg x19, x23, x29, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csneg xzr, x3, x4, ge
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csneg x5, xzr, x6, hs
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csneg x7, x8, xzr, lo
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cset w3, eq
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cset x9, pl
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csetm w20, ne
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csetm x30, ge
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinc w2, wzr, wzr, al
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinv x3, xzr, xzr, nv
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cinc w3, w5, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cinc wzr, w4, le
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cset w9, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cinc x3, x5, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cinc xzr, x4, le
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cset x9, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinc w5, w6, w6, nv
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinc x1, x2, x2, al
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cinv w3, w5, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cinv wzr, w4, le
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csetm w9, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cinv x3, x5, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cinv xzr, x4, le
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csetm x9, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinv x1, x0, x0, al
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinv w9, w8, w8, nv
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cneg w3, w5, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cneg wzr, w4, le
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cneg w9, wzr, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cneg x3, x5, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cneg xzr, x4, le
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cneg x9, xzr, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csneg x4, x8, x8, al
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - csinv w9, w8, w8, nv
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - rbit w0, w7
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - rbit x18, x3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - rev16 w17, w1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - rev16 x5, x2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - rev w18, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - rev32 x20, x1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - rev x22, x2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - clz w24, w3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - clz x26, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cls w3, w5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - cls x20, x5
+# CHECK-NEXT: - - - - - - - - - - - - 12.00 - - - - - - - - - - - - udiv w0, w7, w10
+# CHECK-NEXT: - - - - - - - - - - - - 20.00 - - - - - - - - - - - - udiv x9, x22, x4
+# CHECK-NEXT: - - - - - - - - - - - - 12.00 - - - - - - - - - - - - sdiv w12, w21, w0
+# CHECK-NEXT: - - - - - - - - - - - - 20.00 - - - - - - - - - - - - sdiv x13, x2, x1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsl w11, w12, w13
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsl x14, x15, x16
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr w17, w18, w19
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr x20, x21, x22
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr w23, w24, w25
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr x26, x27, x28
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ror w0, w1, w2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ror x3, x4, x5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsl w6, w7, w8
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsl x9, x10, x11
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr w12, w13, w14
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - lsr x15, x16, x17
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr w18, w19, w20
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - asr x21, x22, x23
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ror w24, w25, w26
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ror x27, x28, x29
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smulh x30, x29, x28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smulh xzr, x27, x26
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umulh x30, x29, x28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umulh x23, x30, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - madd w1, w3, w7, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - madd wzr, w0, w9, w11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - madd w13, wzr, w4, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - madd w19, w30, wzr, w29
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - mul w4, w5, w6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - madd x1, x3, x7, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - madd xzr, x0, x9, x11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - madd x13, xzr, x4, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - madd x19, x30, xzr, x29
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - mul x4, x5, x6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - msub w1, w3, w7, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - msub wzr, w0, w9, w11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - msub w13, wzr, w4, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - msub w19, w30, wzr, w29
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - mneg w4, w5, w6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - msub x1, x3, x7, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - msub xzr, x0, x9, x11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - msub x13, xzr, x4, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - msub x19, x30, xzr, x29
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - mneg x4, x5, x6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smaddl x3, w5, w2, x9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smaddl xzr, w10, w11, x12
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smaddl x13, wzr, w14, x15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smaddl x16, w17, wzr, x18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smull x19, w20, w21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smsubl x3, w5, w2, x9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smsubl xzr, w10, w11, x12
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smsubl x13, wzr, w14, x15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smsubl x16, w17, wzr, x18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smnegl x19, w20, w21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umaddl x3, w5, w2, x9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umaddl xzr, w10, w11, x12
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umaddl x13, wzr, w14, x15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umaddl x16, w17, wzr, x18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umull x19, w20, w21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umsubl x3, w5, w2, x9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umsubl x16, w17, wzr, x18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umnegl x19, w20, w21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smulh x30, x29, x28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smulh x23, x22, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umulh x23, x22, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - mul x19, x20, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - mneg w21, w22, w23
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smull x11, w13, w17
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umull x11, w13, w17
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - smnegl x11, w13, w17
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - umnegl x11, w13, w17
+# CHECK-NEXT: - - - - - - - - - - - - 0.63 0.63 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - extr w3, w5, w7, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.63 0.63 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - extr w11, w13, w17, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.63 0.63 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - extr x3, x5, x7, #15
+# CHECK-NEXT: - - - - - - - - - - - - 0.63 0.63 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - extr x11, x13, x17, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ror x19, x23, #24
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ror x29, xzr, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ror w9, w13, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmp s3, s5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmp s31, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmp s31, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmpe s29, s30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmpe s15, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmpe s15, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmp d4, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmp d23, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmp d23, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmpe d26, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmpe d29, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmpe d29, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fccmp s1, s31, #0, eq
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fccmp s3, s0, #15, hs
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fccmp s31, s15, #13, hs
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fccmp d9, d31, #0, le
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fccmp d3, d0, #15, gt
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fccmp d31, d5, #7, ne
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fccmpe s1, s31, #0, eq
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fccmpe s3, s0, #15, hs
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fccmpe s31, s15, #13, hs
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fccmpe d9, d31, #0, le
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fccmpe d3, d0, #15, gt
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fccmpe d31, d5, #7, ne
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcsel s3, s20, s9, pl
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcsel d9, d10, d11, mi
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov s0, s1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabs s2, s3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg s4, s5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - fsqrt s6, s7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt d8, s9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt h10, s11
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintn s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintp s14, s15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintm s16, s17
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintz s18, s19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frinta s20, s21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintx s22, s23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frinti s24, s25
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov d0, d1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabs d2, d3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg d4, d5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - fsqrt d6, d7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt s8, d9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt h10, d11
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintn d12, d13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintp d14, d15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintm d16, d17
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintz d18, d19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frinta d20, d21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintx d22, d23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frinti d24, d25
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt s26, h27
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt d28, h29
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul s20, s19, s17
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - fdiv s1, s2, s3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd s4, s5, s6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub s7, s8, s9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax s10, s11, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin s13, s14, s15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm s16, s17, s18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm s19, s20, s21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmul s22, s23, s2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul d20, d19, d17
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - fdiv d1, d2, d3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd d4, d5, d6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub d7, d8, d9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax d10, d11, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin d13, d14, d15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm d16, d17, d18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm d19, d20, d21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmul d22, d23, d24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmadd s3, s5, s6, s31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmadd d3, d13, d0, d23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmsub s3, s5, s6, s31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmsub d3, d13, d0, d23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmadd s3, s5, s6, s31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmadd d3, d13, d0, d23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmsub s3, s5, s6, s31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmsub d3, d13, d0, d23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs w3, h5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs wzr, h20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs w19, h0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs x3, h5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs x12, h30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs x19, h0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs w3, s5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs wzr, s20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs w19, s0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs x3, s5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs x12, s30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs x19, s0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs w3, d5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs wzr, d20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs w19, d0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs x3, d5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs x12, d30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs x19, d0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu w3, h5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu wzr, h20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu w19, h0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu x3, h5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu x12, h30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu x19, h0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu w3, s5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu wzr, s20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu w19, s0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu x3, s5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu x12, s30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu x19, s0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu w3, d5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu wzr, d20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu w19, d0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu x3, d5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu x12, d30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu x19, d0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf h23, w19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf h31, wzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf h14, w0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf h23, x19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf h31, xzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf h14, x0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf s23, w19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf s31, wzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf s14, w0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf s23, x19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf s31, xzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf s14, x0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf d23, w19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf d31, wzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf d14, w0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf d23, x19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf d31, xzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf d14, x0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf h23, w19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf h31, wzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf h14, w0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf h23, x19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf h31, xzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf h14, x0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf s23, w19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf s31, wzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf s14, w0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf s23, x19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf s31, xzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf s14, x0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf d23, w19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf d31, wzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf d14, w0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf d23, x19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf d31, xzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf d14, x0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtns w3, h31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtns xzr, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtnu wzr, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtnu x0, h0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtps wzr, h9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtps x12, h20
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtpu w30, h23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtpu x29, h3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtms w2, h3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtms x4, h5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtmu w6, h7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtmu x8, h9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs w10, h11
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs x12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu w14, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu x15, h16
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf h17, w18
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf h19, x20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf h21, w22
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf h23, x24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtas w25, h26
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtas x27, h28
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtau w29, h30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtau xzr, h0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtns w3, s31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtns xzr, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtnu wzr, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtnu x0, s0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtps wzr, s9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtps x12, s20
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtpu w30, s23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtpu x29, s3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtms w2, s3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtms x4, s5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtmu w6, s7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtmu x8, s9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs w10, s11
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs x12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu w14, s15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu x15, s16
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf s17, w18
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf s19, x20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf s21, w22
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf s23, x24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtas w25, s26
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtas x27, s28
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtau w29, s30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtau xzr, s0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtns w3, d31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtns xzr, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtnu wzr, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtnu x0, d0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtps wzr, d9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtps x12, d20
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtpu w30, d23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtpu x29, d3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtms w2, d3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtms x4, d5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtmu w6, d7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtmu x8, d9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs w10, d11
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzs x12, d13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu w14, d15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtzu x15, d16
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf d17, w18
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - scvtf d19, x20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf d21, w22
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - ucvtf d23, x24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtas w25, d26
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtas x27, d28
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtau w29, d30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fcvtau xzr, d0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fmov w3, s9
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - fmov s9, w3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fmov x20, d31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - fmov d1, x15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - fmov x3, v12.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 fmov v1.d[1], x19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov s2, #0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov s3, #1.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov d30, #16.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov s4, #1.06250000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov d10, #1.93750000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov s12, #-1.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov d16, #8.50000000
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr w3, #0
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr x29, #4
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsw xzr, #-4
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr s0, #8
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr d0, #1048572
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr q0, #-1048576
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - prfm pldl1strm, #0
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - prfm #22, #0
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - - - stxrb w18, w8, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - - - stxrh w24, w15, [x16]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - - - stxr w5, w6, [x17]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - - - stxr w1, x10, [x21]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldxrb w30, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldxrh w17, [x4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldxr w22, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldxr x11, [x29]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldxr x11, [x29]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldxr x11, [x29]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - - - stxp w12, w11, w10, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - - - stxp wzr, x27, x9, [x12]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldxp w0, wzr, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldxp x17, x0, [x18]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldxp x17, x0, [x18]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - - - stlxrb w12, w22, [x0]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - - - stlxrh w10, w1, [x1]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - - - stlxr w9, w2, [x2]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - - - stlxr w9, x3, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldaxrb w8, [x4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldaxrh w7, [x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldaxr w6, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldaxr x5, [x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldaxr x5, [x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldaxr x5, [x6]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - - - stlxp w4, w5, w6, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - - - stlxp wzr, x6, x7, [x1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldaxp w5, w18, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldaxp x6, x19, [x22]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldaxp x6, x19, [x22]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stlrb w24, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stlrh w25, [x30]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stlr w26, [x29]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stlr x27, [x28]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stlr x27, [x28]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stlr x27, [x28]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldarb w23, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldarh w22, [x30]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldar wzr, [x29]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldar x21, [x28]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldar x21, [x28]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldar x21, [x28]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - sturb w9, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - sturh wzr, [x12, #255]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stur w16, [x0, #-256]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stur x28, [x14, #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldurb w1, [x20, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldurh w20, [x1, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldur w12, [sp, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldur xzr, [x12, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldursb x9, [x7, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldursh x17, [x19, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldursw x20, [x15, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfum pldl2keep, [sp, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldursb w19, [x1, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldursh w15, [x21, #-256]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stur b0, [sp, #1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stur h12, [x12, #-1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stur s15, [x0, #255]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stur d31, [x5, #25]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stur q9, [x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldur b3, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldur h5, [x4, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldur s7, [x12, #-1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldur d11, [x19, #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldur q13, [x1, #2]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - strb w9, [x2], #255
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - strb w10, [x3], #1
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - strb w10, [x3], #-256
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - strh w9, [x2], #255
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - strh w9, [x2], #1
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - strh w10, [x3], #-256
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - str w19, [sp], #255
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - str w20, [x30], #1
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - str w21, [x12], #-256
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - str xzr, [x9], #255
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - str x2, [x3], #1
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - str x19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrb w9, [x2], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrb w10, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrb w10, [x3], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrh w9, [x2], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrh w9, [x2], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrh w10, [x3], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr w19, [sp], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr w20, [x30], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr w21, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr xzr, [x9], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr x2, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr x19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsb xzr, [x9], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsb x2, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsb x19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsh xzr, [x9], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsh x2, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsh x19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsw xzr, [x9], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsw x2, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsw x19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsb wzr, [x9], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsb w2, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsb w19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsh wzr, [x9], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsh w2, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsh w19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str b0, [x0], #255
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str b3, [x3], #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str b5, [sp], #-256
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str h10, [x10], #255
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str h13, [x23], #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str h15, [sp], #-256
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str s20, [x20], #255
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str s23, [x23], #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str s25, [x0], #-256
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str d20, [x20], #255
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str d23, [x23], #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str d25, [x0], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr b0, [x0], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr b3, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr b5, [sp], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr h10, [x10], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr h13, [x23], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr h15, [sp], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr s20, [x20], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr s23, [x23], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr s25, [x0], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr d20, [x20], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr d23, [x23], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr d25, [x0], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr q20, [x1], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr q23, [x9], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr q25, [x20], #-256
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str q10, [x1], #255
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str q22, [sp], #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str q21, [x20], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr x3, [x4, #0]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - strb w9, [x2, #255]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - strb w10, [x3, #1]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - strb w10, [x3, #-256]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - strh w9, [x2, #255]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - strh w9, [x2, #1]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - strh w10, [x3, #-256]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - str w19, [sp, #255]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - str w20, [x30, #1]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - str w21, [x12, #-256]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - str xzr, [x9, #255]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - str x2, [x3, #1]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - str x19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrb w9, [x2, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrb w10, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrb w10, [x3, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrh w9, [x2, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrh w9, [x2, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrh w10, [x3, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr w19, [sp, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr w20, [x30, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr w21, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr xzr, [x9, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr x2, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr x19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsb xzr, [x9, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsb x2, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsb x19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsh xzr, [x9, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsh x2, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsh x19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsw xzr, [x9, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsw x2, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsw x19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsb wzr, [x9, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsb w2, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsb w19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsh wzr, [x9, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsh w2, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldrsh w19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str b0, [x0, #255]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str b3, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str b5, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str h10, [x10, #255]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str h13, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str h15, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str s20, [x20, #255]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str s23, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str s25, [x0, #-256]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str d20, [x20, #255]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str d23, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str d25, [x0, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr b0, [x0, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr b3, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr b5, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr h10, [x10, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr h13, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr h15, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr s20, [x20, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr s23, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr s25, [x0, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr d20, [x20, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr d23, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr d25, [x0, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr q20, [x1, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr q23, [x9, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldr q25, [x20, #-256]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str q10, [x1, #255]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str q22, [sp, #1]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - str q21, [x20, #-256]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - sttrb w9, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - sttrh wzr, [x12, #255]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - sttr w16, [x0, #-256]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - sttr x28, [x14, #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldtrb w1, [x20, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldtrh w20, [x1, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldtr w12, [sp, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldtr xzr, [x12, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldtrsb x9, [x7, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldtrsh x17, [x19, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldtrsw x20, [x15, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldtrsb w19, [x1, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldtrsh w15, [x21, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr x4, [x29]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr x30, [x12, #32760]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr x20, [sp, #8]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr xzr, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr w2, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr w17, [sp, #16380]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr w13, [x2, #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsw x2, [x5, #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsw x23, [sp, #16380]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrh w2, [x4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsh w23, [x6, #8190]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsh wzr, [sp, #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsh x29, [x2, #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrb w26, [x3, #121]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrb w12, [x2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsb w27, [sp, #4095]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsb xzr, [x15]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - str x30, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - str w20, [x4, #16380]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - strh w17, [sp, #8190]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - strb w23, [x3, #4095]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - strb wzr, [x2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr b31, [sp, #4095]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr h20, [x2, #8190]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr s10, [x19, #16380]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr d3, [x10, #32760]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - str q12, [sp, #65520]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr h3, [sp, x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr h9, [x27, x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr h10, [x30, x7, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - str h11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - str h12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 - - str h13, [x27, x5, sxtx #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr h14, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr h15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr h16, [x24, w8, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr h17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - str h18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr h19, [x21, wzr, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrb w3, [sp, x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrb w9, [x27, x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsb w10, [x30, x7]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrb w11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - strb w12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrb w14, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsb w15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrb w17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsb x18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsh w3, [sp, x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsh w9, [x27, x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrh w10, [x30, x7, lsl #1]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - strh w11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrh w12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsh x13, [x27, x5, sxtx #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrh w14, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrh w15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsh w16, [x24, w8, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrh w17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrh w18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - strh w19, [x21, wzr, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr w3, [sp, x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr s9, [x27, x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr w10, [x30, x7, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr w11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - str s12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - str w13, [x27, x5, sxtx #2]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - str w14, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr w15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr w16, [x24, w8, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsw x17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr w18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldrsw x19, [x21, wzr, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr x3, [sp, x5]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - str x9, [x27, x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr d10, [x30, x7, lsl #3]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - str x11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr x12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr x13, [x27, x5, sxtx #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfm pldl1keep, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr x15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr x16, [x24, w8, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr x17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr x18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - str d19, [x21, wzr, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr q3, [sp, x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr q9, [x27, x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr q10, [x30, x7, lsl #4]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - str q11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - str q12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 - - str q13, [x27, x5, sxtx #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr q14, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr q15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr q16, [x24, w8, uxtw #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr q17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - str q18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldr q19, [x21, wzr, sxtw #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldp w3, w5, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stp wzr, w9, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldp w2, wzr, [sp, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldp w9, w10, [sp, #4]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldpsw x9, x10, [sp, #4]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldpsw x9, x10, [x2, #-256]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldpsw x20, x30, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldp x21, x29, [x2, #504]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldp x22, x23, [x3, #-512]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldp x24, x25, [x4, #8]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldp s29, s28, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stp s27, s26, [sp, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldp s1, s2, [x3, #44]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stp d3, d5, [x9, #504]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stp d7, d11, [x10, #-512]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldp d2, d3, [x30, #-8]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 - - stp q3, q5, [sp]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 - - stp q17, q19, [sp, #1008]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - - - - - ldp q23, q29, [x1, #-1024]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldp w3, w5, [sp], #0
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - stp wzr, w9, [sp], #252
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldp w2, wzr, [sp], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldp w9, w10, [sp], #4
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldpsw x9, x10, [sp], #4
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldpsw x9, x10, [x2], #-256
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldpsw x20, x30, [sp], #252
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldp x21, x29, [x2], #504
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldp x22, x23, [x3], #-512
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldp x24, x25, [x4], #8
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldp s29, s28, [sp], #252
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - stp s27, s26, [sp], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldp s1, s2, [x3], #44
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - stp d3, d5, [x9], #504
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - stp d7, d11, [x10], #-512
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldp d2, d3, [x30], #-8
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 1.00 1.00 - - stp q3, q5, [sp], #0
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 1.00 1.00 - - stp q17, q19, [sp], #1008
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - - - - - ldp q23, q29, [x1], #-1024
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldp w3, w5, [sp, #0]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - - - stp wzr, w9, [sp, #252]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldp w2, wzr, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldp w9, w10, [sp, #4]!
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldpsw x9, x10, [sp, #4]!
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldpsw x9, x10, [x2, #-256]!
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldpsw x20, x30, [sp, #252]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldp x21, x29, [x2, #504]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldp x22, x23, [x3, #-512]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldp x24, x25, [x4, #8]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldp s29, s28, [sp, #252]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - stp s27, s26, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldp s1, s2, [x3, #44]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - stp d3, d5, [x9, #504]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 - - stp d7, d11, [x10, #-512]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - - - ldp d2, d3, [x30, #-8]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 1.00 1.00 - - stp q3, q5, [sp, #0]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 1.00 1.00 - - stp q17, q19, [sp, #1008]!
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - - - - - ldp q23, q29, [x1, #-1024]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnp w3, w5, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - - - stnp wzr, w9, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnp w2, wzr, [sp, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnp w9, w10, [sp, #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnp x21, x29, [x2, #504]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnp x22, x23, [x3, #-512]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnp x24, x25, [x4, #8]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnp s29, s28, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnp s27, s26, [sp, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnp s1, s2, [x3, #44]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnp d3, d5, [x9, #504]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnp d7, d11, [x10, #-512]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnp d2, d3, [x30, #-8]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 - - stnp q3, q5, [sp]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 - - stnp q17, q19, [sp, #1008]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - - - - - ldnp q23, q29, [x1, #-1024]
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - mov w3, #983055
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - mov x10, #-6148914691236517206
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ands w4, w4, #0xf000f
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ands x11, x11, #0xaaaaaaaaaaaaaaaa
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - and w12, w23, w21
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - and w16, w15, w1, lsl #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - and w9, w4, w10, lsl #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - and w3, w30, w11
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - and x3, x5, x7, lsl #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - and x5, x14, x19, asr #4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - and w3, w17, w19, ror #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - and w0, w2, wzr, lsr #17
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - and w3, w30, w11, asr #2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - and xzr, x4, x26
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - and w3, wzr, w20, ror #2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - and x7, x20, xzr, asr #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - bic x13, x20, x14, lsl #47
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - bic w2, w7, w9
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - orr w2, w7, w0, asr #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - orr x8, x9, x10, lsl #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - orn x3, x5, x7, asr #2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - orn w2, w5, w29
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - ands w7, wzr, w9, lsl #1
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - ands x3, x5, x20, ror #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - bics w3, w5, w7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - bics x3, xzr, x3, lsl #1
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - tst w3, w7, lsl #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - - - tst x2, x20, asr #2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mov x3, x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mov x3, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mov wzr, w2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mov w3, w5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - movz w2, #0, lsl #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - mov w2, #-1235
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - mov x2, #5299989643264
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mov x2, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - movk w3, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - movz x4, #0, lsl #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - movk w5, #0, lsl #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - movz x6, #0, lsl #32
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - movk x7, #0, lsl #32
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - movz x8, #0, lsl #48
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - movk x9, #0, lsl #48
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adr x2, #1600
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adrp x21, #6553600
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - adr x0, #262144
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - tbz x12, #62, #0
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - tbz x12, #62, #4
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - tbz x12, #62, #-32768
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - tbnz x12, #60, #32764
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - b #4
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - b #-4
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - b #134217724
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - br x20
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - - blr xzr
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - ret x10
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - ret
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - eret
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - - - drps
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-clear-upper-regs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-clear-upper-regs.s
new file mode 100644
index 0000000..5e5f7a0
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-clear-upper-regs.s
@@ -0,0 +1,892 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3 --timeline --timeline-max-iterations=4 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN FPR8-bit
+ldr b0, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN FPR16-bit
+ldr h0, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN FPR32-bit
+ldr s0, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN FPR64-bit
+ldr d0, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN FPR128-bit
+ldr q0, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN SIMD64-bit-b
+ld1 {v0.8b}, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN SIMD64-bit-h
+ld1 {v0.4h}, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN SIMD64-bit-s
+ld1 {v0.2s}, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN SIMD64-bit-d
+ld1 {v0.1d}, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN insr
+insr z0.s, w0
+add z0.s, z0.s, z0.s
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - FPR8-bit
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 44
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.55
+# CHECK-NEXT: IPC: 4.55
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ldr b0, [sp]
+# CHECK-NEXT: 1 2 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.25 0.25 0.25 0.25
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - - - ldr b0, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ldr b0, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ldr b0, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr b0, [sp]
+# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE-R. ldr b0, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ldr b0, [sp]
+# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
+
+# CHECK: [1] Code Region - FPR16-bit
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 44
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.55
+# CHECK-NEXT: IPC: 4.55
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ldr h0, [sp]
+# CHECK-NEXT: 1 2 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.25 0.25 0.25 0.25
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - - - ldr h0, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ldr h0, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ldr h0, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr h0, [sp]
+# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE-R. ldr h0, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ldr h0, [sp]
+# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
+
+# CHECK: [2] Code Region - FPR32-bit
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 44
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.55
+# CHECK-NEXT: IPC: 4.55
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ldr s0, [sp]
+# CHECK-NEXT: 1 2 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.25 0.25 0.25 0.25
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - - - ldr s0, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ldr s0, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ldr s0, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr s0, [sp]
+# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE-R. ldr s0, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ldr s0, [sp]
+# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
+
+# CHECK: [3] Code Region - FPR64-bit
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 44
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.55
+# CHECK-NEXT: IPC: 4.55
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ldr d0, [sp]
+# CHECK-NEXT: 1 2 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.25 0.25 0.25 0.25
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - - - ldr d0, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ldr d0, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ldr d0, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr d0, [sp]
+# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE-R. ldr d0, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ldr d0, [sp]
+# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
+
+# CHECK: [4] Code Region - FPR128-bit
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 44
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.55
+# CHECK-NEXT: IPC: 4.55
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ldr q0, [sp]
+# CHECK-NEXT: 1 2 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.25 0.25 0.25 0.25
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - - - ldr q0, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ldr q0, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ldr q0, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr q0, [sp]
+# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE-R. ldr q0, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ldr q0, [sp]
+# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
+
+# CHECK: [5] Code Region - SIMD64-bit-b
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 44
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.55
+# CHECK-NEXT: IPC: 4.55
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ld1 { v0.8b }, [sp]
+# CHECK-NEXT: 1 2 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.25 0.25 0.25 0.25
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - - - ld1 { v0.8b }, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ld1 { v0.8b }, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ld1 { v0.8b }, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ld1 { v0.8b }, [sp]
+# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE-R. ld1 { v0.8b }, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ld1 { v0.8b }, [sp]
+# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
+
+# CHECK: [6] Code Region - SIMD64-bit-h
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 44
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.55
+# CHECK-NEXT: IPC: 4.55
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ld1 { v0.4h }, [sp]
+# CHECK-NEXT: 1 2 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.25 0.25 0.25 0.25
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - - - ld1 { v0.4h }, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ld1 { v0.4h }, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ld1 { v0.4h }, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ld1 { v0.4h }, [sp]
+# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE-R. ld1 { v0.4h }, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ld1 { v0.4h }, [sp]
+# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
+
+# CHECK: [7] Code Region - SIMD64-bit-s
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 44
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.55
+# CHECK-NEXT: IPC: 4.55
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ld1 { v0.2s }, [sp]
+# CHECK-NEXT: 1 2 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.25 0.25 0.25 0.25
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - - - ld1 { v0.2s }, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ld1 { v0.2s }, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ld1 { v0.2s }, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ld1 { v0.2s }, [sp]
+# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE-R. ld1 { v0.2s }, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ld1 { v0.2s }, [sp]
+# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
+
+# CHECK: [8] Code Region - SIMD64-bit-d
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 44
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.55
+# CHECK-NEXT: IPC: 4.55
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ld1 { v0.1d }, [sp]
+# CHECK-NEXT: 1 2 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.25 0.25 0.25 0.25
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - - - ld1 { v0.1d }, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ld1 { v0.1d }, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ld1 { v0.1d }, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ld1 { v0.1d }, [sp]
+# CHECK-NEXT: [2,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE-R. ld1 { v0.1d }, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.3 ld1 { v0.1d }, [sp]
+# CHECK-NEXT: 1. 4 7.3 0.0 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.3 0.6 0.6 <total>
+
+# CHECK: [9] Code Region - insr
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.28
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 5 1.00 insr z0.s, w0
+# CHECK-NEXT: 1 2 0.25 add z0.s, z0.s, z0.s
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.33 1.00 0.33 0.34
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - insr z0.s, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.33 - 0.33 0.34 add z0.s, z0.s, z0.s
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . insr z0.s, w0
+# CHECK-NEXT: [0,1] D=====eeER. . . . . add z0.s, z0.s, z0.s
+# CHECK-NEXT: [1,0] D=======eeeeeER. . . . insr z0.s, w0
+# CHECK-NEXT: [1,1] D============eeER . . . add z0.s, z0.s, z0.s
+# CHECK-NEXT: [2,0] D==============eeeeeER . . insr z0.s, w0
+# CHECK-NEXT: [2,1] D===================eeER . . add z0.s, z0.s, z0.s
+# CHECK-NEXT: [3,0] .D====================eeeeeER . insr z0.s, w0
+# CHECK-NEXT: [3,1] .D=========================eeER add z0.s, z0.s, z0.s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 11.3 0.3 0.0 insr z0.s, w0
+# CHECK-NEXT: 1. 4 16.3 0.0 0.0 add z0.s, z0.s, z0.s
+# CHECK-NEXT: 4 13.8 0.1 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-forwarding.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-forwarding.s
new file mode 100644
index 0000000..1214b15
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-forwarding.s
@@ -0,0 +1,1960 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3 -mattr=+sve2-aes,+sve2-sha3,+sve2-sm4 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=2 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN madd
+mul x0, x0, x0
+madd x0, x1, x2, x0
+madd x0, x1, x2, x0
+madd x0, x0, x0, x0
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN smaddl
+mul x0, x0, x0
+smaddl x0, w1, w2, x0
+smaddl x0, w1, w2, x0
+smaddl x0, w0, w0, x0
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN fmadd
+fadd d0, d0, d0
+fmadd d0, d1, d2, d0
+fmul d0, d0, d0
+fmadd d0, d1, d2, d0
+fmadd d0, d1, d2, d0
+fmadd d0, d0, d1, d2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN saba
+mul v0.4s, v0.4s, v0.4s
+saba v0.4s, v1.4s, v2.4s
+saba v0.4s, v1.4s, v2.4s
+saba v0.4s, v0.4s, v1.4s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN sdot
+mul v0.4s, v0.4s, v0.4s
+sdot v0.4s, v1.16b, v2.16b
+sdot v0.4s, v1.16b, v2.16b
+sdot v0.4s, v0.16b, v1.16b
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN smmla
+mul v0.4s, v0.4s, v0.4s
+smmla v0.4s, v1.16b, v2.16b
+smmla v0.4s, v1.16b, v2.16b
+smmla v0.4s, v0.16b, v1.16b
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN mla
+mul v0.4s, v0.4s, v0.4s
+mla v0.4s, v1.4s, v2.4s
+mla v0.4s, v1.4s, v2.4s
+mla v0.4s, v0.4s, v1.4s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN sqrdmlah
+mul v0.4s, v0.4s, v0.4s
+sqrdmlah v0.4s, v1.4s, v2.4s
+sqrdmlah v0.4s, v1.4s, v2.4s
+sqrdmlah v0.4s, v0.4s, v1.4s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN smlal2
+mul v0.4s, v0.4s, v0.4s
+smlal2 v0.4s, v1.8h, v2.8h
+smlal2 v0.4s, v1.8h, v2.8h
+smlal2 v0.4s, v0.8h, v1.8h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN sadalp
+mul v0.4s, v0.4s, v0.4s
+sadalp v0.2d, v1.4s
+sadalp v0.2d, v1.4s
+sadalp v0.2d, v0.4s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN ssra
+mul v0.4s, v0.4s, v0.4s
+ssra v0.2d, v1.2d, #1
+ssra v0.2d, v1.2d, #1
+ssra v0.2d, v0.2d, #1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN fcmla
+fmul v0.4s, v0.4s, v0.4s
+fcmla v0.2d, v1.2d, v2.2d, #90
+fcmla v0.2d, v1.2d, v2.2d, #90
+fcmla v0.2d, v0.2d, v1.2d, #90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN fmla
+fmul v0.2d, v0.2d, v0.2d
+fmla v0.2d, v1.2d, v2.2d
+fadd v0.2d, v0.2d, v0.2d
+fmla v0.2d, v1.2d, v2.2d
+fmla v0.2d, v1.2d, v2.2d
+fmla v0.2d, v0.2d, v1.2d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN fmlal
+fmul v0.2d, v0.2d, v0.2d
+fmlal v0.4s, v1.4h, v2.4h
+fadd v0.2d, v0.2d, v0.2d
+fmlal v0.4s, v1.4h, v2.4h
+fmlal v0.4s, v1.4h, v2.4h
+fmlal v0.4s, v0.4h, v1.4h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN bfdot
+fmul v0.2d, v0.2d, v0.2d
+bfdot v0.4s, v1.8h, v2.8h
+bfdot v0.4s, v1.8h, v2.8h
+bfdot v0.4s, v0.8h, v1.8h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN bfmmla
+fmul v0.2d, v0.2d, v0.2d
+bfmmla v0.4s, v1.8h, v2.8h
+bfmmla v0.4s, v1.8h, v2.8h
+bfmmla v0.4s, v0.8h, v1.8h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN bfmlalb
+fmul v0.2d, v0.2d, v0.2d
+bfmlalb v0.4s, v1.8h, v2.8h
+bfmlalb v0.4s, v1.8h, v2.8h
+bfmlalb v0.4s, v0.8h, v1.8h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN crc32b
+mul w0, w0, w0
+crc32b w0, w0, w1
+crc32b w0, w0, w1
+crc32b w0, w0, w0
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z saba
+mul z0.d, z0.d, z0.d
+saba z0.d, z1.d, z2.d
+saba z0.d, z1.d, z2.d
+saba z0.d, z0.d, z1.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sadalp
+mul z0.d, z0.d, z0.d
+sadalp z0.d, p0/m, z1.s
+sadalp z0.d, p0/m, z1.s
+sadalp z0.d, p0/m, z0.s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z ssra
+mul z0.d, z0.d, z0.d
+ssra z0.d, z1.d, #1
+ssra z0.d, z1.d, #1
+ssra z0.d, z0.d, #1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z cdot.s
+mul z0.d, z0.d, z0.d
+cdot z0.s, z1.b, z2.b, #90
+cdot z0.s, z1.b, z2.b, #90
+cdot z0.s, z0.b, z1.b, #90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z cdot.d
+mul z0.d, z0.d, z0.d
+cdot z0.d, z1.h, z2.h, #90
+cdot z0.d, z1.h, z2.h, #90
+cdot z0.d, z0.h, z1.h, #90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z cmla.b
+mul z0.d, z0.d, z0.d
+cmla z0.b, z1.b, z2.b, #90
+cmla z0.b, z1.b, z2.b, #90
+cmla z0.b, z0.b, z1.b, #90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z cmla.d
+mul z0.d, z0.d, z0.d
+cmla z0.d, z1.d, z2.d, #90
+cmla z0.d, z1.d, z2.d, #90
+cmla z0.d, z0.d, z1.d, #90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sdot.s
+mul z0.d, z0.d, z0.d
+sdot z0.s, z1.b, z2.b
+sdot z0.s, z1.b, z2.b
+sdot z0.s, z0.b, z1.b
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sudot
+mul z0.d, z0.d, z0.d
+sdot z0.s, z1.b, z2.b[1]
+sdot z0.s, z1.b, z2.b[1]
+sdot z0.s, z0.b, z1.b[1]
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sdot.d
+mul z0.d, z0.d, z0.d
+sdot z0.d, z1.h, z2.h
+sdot z0.d, z1.h, z2.h
+sdot z0.d, z0.h, z1.h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z smmla
+mul z0.s, z0.s, z0.s
+smmla z0.s, z1.b, z2.b
+smmla z0.s, z1.b, z2.b
+smmla z0.s, z0.b, z1.b
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z mla.b
+mul z0.d, z0.d, z0.d
+mla z0.b, p0/m, z1.b, z2.b
+mla z0.b, p0/m, z1.b, z2.b
+mla z0.b, p0/m, z0.b, z1.b
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z mla.d
+mul z0.d, z0.d, z0.d
+mla z0.d, p0/m, z1.d, z2.d
+mla z0.d, p0/m, z1.d, z2.d
+mla z0.d, p0/m, z0.d, z1.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z smlalb
+mul z0.d, z0.d, z0.d
+smlalb z0.d, z1.s, z2.s
+smlalb z0.d, z1.s, z2.s
+smlalb z0.d, z0.s, z1.s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sqdmlalb
+mul z0.d, z0.d, z0.d
+sqdmlalb z0.d, z1.s, z2.s
+sqdmlalb z0.d, z1.s, z2.s
+sqdmlalb z0.d, z0.s, z1.s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sqrdmlah.b
+mul z0.d, z0.d, z0.d
+sqrdmlah z0.b, z1.b, z2.b
+sqrdmlah z0.b, z1.b, z2.b
+sqrdmlah z0.b, z0.b, z1.b
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sqrdmlah.d
+mul z0.d, z0.d, z0.d
+sqrdmlah z0.d, z1.d, z2.d
+sqrdmlah z0.d, z1.d, z2.d
+sqrdmlah z0.d, z0.d, z1.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z fcmla ZPmZZ
+fmul z0.d, z0.d, z0.d
+fcmla z0.d, p0/m, z1.d, z2.d, 90
+fcmla z0.d, p0/m, z1.d, z2.d, 90
+fcmla z0.d, p0/m, z0.d, z1.d, 90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z fcmla ZZZI
+fmul z0.d, z0.d, z0.d
+fcmla z0.s, z1.s, z2.s[1], 90
+fcmla z0.s, z1.s, z2.s[1], 90
+fcmla z0.s, z0.s, z1.s[1], 90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z fmla ZPmZZ
+fmul z0.d, z0.d, z0.d
+fmla z0.d, p0/m, z1.d, z2.d
+fmla z0.d, p0/m, z1.d, z2.d
+fmla z0.d, p0/m, z0.d, z1.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z fmla ZZZI
+fmul z0.d, z0.d, z0.d
+fmla z0.d, z1.d, z2.d[1]
+fmla z0.d, z1.d, z2.d[1]
+fmla z0.d, z0.d, z1.d[1]
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z fmlalb ZZZ
+fmul z0.d, z0.d, z0.d
+fmlalb z0.s, z1.h, z2.h
+fmlalb z0.s, z1.h, z2.h
+fmlalb z0.s, z0.h, z1.h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z bfdot
+fmul z0.d, z0.d, z0.d
+bfdot z0.s, z1.h, z2.h
+bfdot z0.s, z1.h, z2.h
+bfdot z0.s, z0.h, z1.h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z bfmmla
+fmul z0.d, z0.d, z0.d
+bfmmla z0.s, z1.h, z2.h
+bfmmla z0.s, z1.h, z2.h
+bfmmla z0.s, z0.h, z1.h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN bfmlalb
+fmul z0.d, z0.d, z0.d
+bfmlalb z0.s, z1.h, z2.h
+bfmlalb z0.s, z1.h, z2.h
+bfmlalb z0.s, z0.h, z1.h
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - madd
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 205
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.95
+# CHECK-NEXT: IPC: 1.95
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . mul x0, x0, x0
+# CHECK-NEXT: [0,1] D=eeER . madd x0, x1, x2, x0
+# CHECK-NEXT: [0,2] DeeE-R . madd x0, x1, x2, x0
+# CHECK-NEXT: [0,3] D==eeER . madd x0, x0, x0, x0
+# CHECK-NEXT: [1,0] D====eeER mul x0, x0, x0
+# CHECK-NEXT: [1,1] D==eeE--R madd x0, x1, x2, x0
+# CHECK-NEXT: [1,2] D=eeE---R madd x0, x1, x2, x0
+# CHECK-NEXT: [1,3] D===eeE-R madd x0, x0, x0, x0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mul x0, x0, x0
+# CHECK-NEXT: 1. 2 2.5 2.5 1.0 madd x0, x1, x2, x0
+# CHECK-NEXT: 2. 2 1.5 1.5 2.0 madd x0, x1, x2, x0
+# CHECK-NEXT: 3. 2 3.5 0.0 0.5 madd x0, x0, x0, x0
+# CHECK-NEXT: 2 2.6 1.1 0.9 <total>
+
+# CHECK: [1] Code Region - smaddl
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 803
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . . mul x0, x0, x0
+# CHECK-NEXT: [0,1] D==eeER . . . smaddl x0, w1, w2, x0
+# CHECK-NEXT: [0,2] D====eeER . . . smaddl x0, w1, w2, x0
+# CHECK-NEXT: [0,3] D======eeER . . smaddl x0, w0, w0, x0
+# CHECK-NEXT: [1,0] D========eeER . . mul x0, x0, x0
+# CHECK-NEXT: [1,1] D==========eeER. . smaddl x0, w1, w2, x0
+# CHECK-NEXT: [1,2] D============eeER . smaddl x0, w1, w2, x0
+# CHECK-NEXT: [1,3] D==============eeER smaddl x0, w0, w0, x0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 5.0 0.5 0.0 mul x0, x0, x0
+# CHECK-NEXT: 1. 2 7.0 0.0 0.0 smaddl x0, w1, w2, x0
+# CHECK-NEXT: 2. 2 9.0 0.0 0.0 smaddl x0, w1, w2, x0
+# CHECK-NEXT: 3. 2 11.0 0.0 0.0 smaddl x0, w0, w0, x0
+# CHECK-NEXT: 2 8.0 0.1 0.0 <total>
+
+# CHECK: [2] Code Region - fmadd
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 1703
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.35
+# CHECK-NEXT: IPC: 0.35
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeER. . . . . . .. fadd d0, d0, d0
+# CHECK-NEXT: [0,1] D==eeeeER . . . . . .. fmadd d0, d1, d2, d0
+# CHECK-NEXT: [0,2] D======eeeER . . . . .. fmul d0, d0, d0
+# CHECK-NEXT: [0,3] D=======eeeeER . . . . .. fmadd d0, d1, d2, d0
+# CHECK-NEXT: [0,4] D=========eeeeER . . . .. fmadd d0, d1, d2, d0
+# CHECK-NEXT: [0,5] D=============eeeeER. . . .. fmadd d0, d0, d1, d2
+# CHECK-NEXT: [1,0] D=================eeER . . .. fadd d0, d0, d0
+# CHECK-NEXT: [1,1] D===================eeeeER . .. fmadd d0, d1, d2, d0
+# CHECK-NEXT: [1,2] D=======================eeeER . .. fmul d0, d0, d0
+# CHECK-NEXT: [1,3] D========================eeeeER .. fmadd d0, d1, d2, d0
+# CHECK-NEXT: [1,4] .D=========================eeeeER .. fmadd d0, d1, d2, d0
+# CHECK-NEXT: [1,5] .D=============================eeeeER fmadd d0, d0, d1, d2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 9.5 0.5 0.0 fadd d0, d0, d0
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 fmadd d0, d1, d2, d0
+# CHECK-NEXT: 2. 2 15.5 0.0 0.0 fmul d0, d0, d0
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmadd d0, d1, d2, d0
+# CHECK-NEXT: 4. 2 18.0 0.0 0.0 fmadd d0, d1, d2, d0
+# CHECK-NEXT: 5. 2 22.0 0.0 0.0 fmadd d0, d0, d1, d2
+# CHECK-NEXT: 2 15.5 0.1 0.0 <total>
+
+# CHECK: [3] Code Region - saba
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 0.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeeER . . . . saba v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . saba v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [0,3] D=========eeeeER . . . saba v0.4s, v0.4s, v1.4s
+# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D=================eeeeER . . saba v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [1,2] D==================eeeeER. . saba v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [1,3] D======================eeeeER saba v0.4s, v0.4s, v1.4s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 saba v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 saba v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 saba v0.4s, v0.4s, v1.4s
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [4] Code Region - sdot
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1103
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 0.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeeER . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeER. . . . sdot v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [0,2] D=====eeeER . . . sdot v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [0,3] D========eeeER . . . sdot v0.4s, v0.16b, v1.16b
+# CHECK-NEXT: [1,0] D===========eeeeER . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D===============eeeER . sdot v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [1,2] D================eeeER . sdot v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [1,3] D===================eeeER sdot v0.4s, v0.16b, v1.16b
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 6.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 sdot v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: 2. 2 11.5 0.0 0.0 sdot v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: 3. 2 14.5 0.0 0.0 sdot v0.4s, v0.16b, v1.16b
+# CHECK-NEXT: 2 10.8 0.1 0.0 <total>
+
+# CHECK: [5] Code Region - smmla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1103
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 0.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeeER . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeER. . . . smmla v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [0,2] D=====eeeER . . . smmla v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [0,3] D========eeeER . . . smmla v0.4s, v0.16b, v1.16b
+# CHECK-NEXT: [1,0] D===========eeeeER . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D===============eeeER . smmla v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [1,2] D================eeeER . smmla v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [1,3] D===================eeeER smmla v0.4s, v0.16b, v1.16b
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 6.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 smmla v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: 2. 2 11.5 0.0 0.0 smmla v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: 3. 2 14.5 0.0 0.0 smmla v0.4s, v0.16b, v1.16b
+# CHECK-NEXT: 2 10.8 0.1 0.0 <total>
+
+# CHECK: [6] Code Region - mla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeeER . . . . mla v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . mla v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [0,3] D=========eeeeER . . . mla v0.4s, v0.4s, v1.4s
+# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D=================eeeeER . . mla v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [1,2] D==================eeeeER. . mla v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [1,3] D======================eeeeER mla v0.4s, v0.4s, v1.4s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 mla v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 mla v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 mla v0.4s, v0.4s, v1.4s
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [7] Code Region - sqrdmlah
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.29
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeeER . . . . sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [0,2] D======eeeeER . . . . sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [0,3] D==========eeeeER . . . sqrdmlah v0.4s, v0.4s, v1.4s
+# CHECK-NEXT: [1,0] D==============eeeeER . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D==================eeeeER. . sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [1,2] D====================eeeeER . sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [1,3] D========================eeeeER sqrdmlah v0.4s, v0.4s, v1.4s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 sqrdmlah v0.4s, v0.4s, v1.4s
+# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
+
+# CHECK: [8] Code Region - smlal2
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeeER . . . . smlal2 v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . smlal2 v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,3] D=========eeeeER . . . smlal2 v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D=================eeeeER . . smlal2 v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,2] D==================eeeeER. . smlal2 v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,3] D======================eeeeER smlal2 v0.4s, v0.8h, v1.8h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 smlal2 v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 smlal2 v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 smlal2 v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [9] Code Region - sadalp
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 0.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeeER . . . . sadalp v0.2d, v1.4s
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . sadalp v0.2d, v1.4s
+# CHECK-NEXT: [0,3] D=========eeeeER . . . sadalp v0.2d, v0.4s
+# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D=================eeeeER . . sadalp v0.2d, v1.4s
+# CHECK-NEXT: [1,2] D==================eeeeER. . sadalp v0.2d, v1.4s
+# CHECK-NEXT: [1,3] D======================eeeeER sadalp v0.2d, v0.4s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 sadalp v0.2d, v1.4s
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 sadalp v0.2d, v1.4s
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 sadalp v0.2d, v0.4s
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [10] Code Region - ssra
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 0.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeeER . . . . ssra v0.2d, v1.2d, #1
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . ssra v0.2d, v1.2d, #1
+# CHECK-NEXT: [0,3] D=========eeeeER . . . ssra v0.2d, v0.2d, #1
+# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D=================eeeeER . . ssra v0.2d, v1.2d, #1
+# CHECK-NEXT: [1,2] D==================eeeeER. . ssra v0.2d, v1.2d, #1
+# CHECK-NEXT: [1,3] D======================eeeeER ssra v0.2d, v0.2d, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 ssra v0.2d, v1.2d, #1
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 ssra v0.2d, v1.2d, #1
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 ssra v0.2d, v0.2d, #1
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [11] Code Region - fcmla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeER . . . . . fmul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D===eeeeER. . . . . fcmla v0.2d, v1.2d, v2.2d, #90
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . fcmla v0.2d, v1.2d, v2.2d, #90
+# CHECK-NEXT: [0,3] D=========eeeeER . . . fcmla v0.2d, v0.2d, v1.2d, #90
+# CHECK-NEXT: [1,0] D=============eeeER . . . fmul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D================eeeeER . . fcmla v0.2d, v1.2d, v2.2d, #90
+# CHECK-NEXT: [1,2] D==================eeeeER. . fcmla v0.2d, v1.2d, v2.2d, #90
+# CHECK-NEXT: [1,3] D======================eeeeER fcmla v0.2d, v0.2d, v1.2d, #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fcmla v0.2d, v1.2d, v2.2d, #90
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fcmla v0.2d, v1.2d, v2.2d, #90
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fcmla v0.2d, v0.2d, v1.2d, #90
+# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
+
+# CHECK: [12] Code Region - fmla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 1703
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.35
+# CHECK-NEXT: IPC: 0.35
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . .. fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,1] D=eeeeER . . . . . .. fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: [0,2] D=====eeER. . . . . .. fadd v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,3] D=======eeeeER . . . . .. fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: [0,4] D=========eeeeER . . . .. fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: [0,5] D=============eeeeER. . . .. fmla v0.2d, v0.2d, v1.2d
+# CHECK-NEXT: [1,0] D=================eeeER . . .. fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,1] D==================eeeeER. . .. fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: [1,2] D======================eeER . .. fadd v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,3] D========================eeeeER .. fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: [1,4] .D=========================eeeeER .. fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: [1,5] .D=============================eeeeER fmla v0.2d, v0.2d, v1.2d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 9.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: 2. 2 14.5 0.0 0.0 fadd v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: 4. 2 18.0 0.0 0.0 fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: 5. 2 22.0 0.0 0.0 fmla v0.2d, v0.2d, v1.2d
+# CHECK-NEXT: 2 15.2 0.1 0.0 <total>
+
+# CHECK: [13] Code Region - fmlal
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 1903
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.32
+# CHECK-NEXT: IPC: 0.32
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeER . . . . . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,1] D===eeeeER. . . . . . . fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: [0,2] D=======eeER . . . . . . fadd v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,3] D=========eeeeER . . . . . fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: [0,4] D===========eeeeER . . . . . fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: [0,5] D===============eeeeER . . . . fmlal v0.4s, v0.4h, v1.4h
+# CHECK-NEXT: [1,0] D===================eeeER. . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,1] D======================eeeeER . . . fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: [1,2] D==========================eeER . . fadd v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,3] D============================eeeeER. . fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: [1,4] .D=============================eeeeER . fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: [1,5] .D=================================eeeeER fmlal v0.4s, v0.4h, v1.4h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1. 2 13.5 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: 2. 2 17.5 0.0 0.0 fadd v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 3. 2 19.5 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: 4. 2 21.0 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: 5. 2 25.0 0.0 0.0 fmlal v0.4s, v0.4h, v1.4h
+# CHECK-NEXT: 2 17.8 0.1 0.0 <total>
+
+# CHECK: [14] Code Region - bfdot
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1603
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.25
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfdot v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,2] D======eeeeeER . . . . . bfdot v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,3] D===========eeeeeER . . . . bfdot v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: [1,0] D================eeeER . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,1] D===================eeeeeER . . bfdot v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,2] D======================eeeeeER. . bfdot v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,3] D===========================eeeeeER bfdot v0.4s, v0.8h, v1.8h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 9.0 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 bfdot v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 2. 2 15.0 0.0 0.0 bfdot v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 3. 2 20.0 0.0 0.0 bfdot v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: 2 14.0 0.1 0.0 <total>
+
+# CHECK: [15] Code Region - bfmmla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1903
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.21
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeER . . . . . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,1] D===eeeeeeER . . . . . . bfmmla v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,2] D=======eeeeeeER . . . . . bfmmla v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,3] D=============eeeeeeER . . . . bfmmla v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: [1,0] D===================eeeER. . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,1] D======================eeeeeeER . . bfmmla v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,2] D==========================eeeeeeER. . bfmmla v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,3] D================================eeeeeeER bfmmla v0.4s, v0.8h, v1.8h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1. 2 13.5 0.0 0.0 bfmmla v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 2. 2 17.5 0.0 0.0 bfmmla v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 3. 2 23.5 0.0 0.0 bfmmla v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: 2 16.3 0.1 0.0 <total>
+
+# CHECK: [16] Code Region - bfmlalb
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfmlalb v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . bfmlalb v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,3] D==========eeeeeER . . . . bfmlalb v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: [1,0] D===============eeeER . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,1] D==================eeeeeER . . bfmlalb v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,2] D====================eeeeeER . . bfmlalb v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,3] D=========================eeeeeER bfmlalb v0.4s, v0.8h, v1.8h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 bfmlalb v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 2. 2 13.5 0.0 0.0 bfmlalb v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 3. 2 18.5 0.0 0.0 bfmlalb v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
+
+# CHECK: [17] Code Region - crc32b
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.57
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . .. mul w0, w0, w0
+# CHECK-NEXT: [0,1] D==eeER . .. crc32b w0, w0, w1
+# CHECK-NEXT: [0,2] D===eeER . .. crc32b w0, w0, w1
+# CHECK-NEXT: [0,3] D=====eeER. .. crc32b w0, w0, w0
+# CHECK-NEXT: [1,0] D=======eeER .. mul w0, w0, w0
+# CHECK-NEXT: [1,1] D=========eeER .. crc32b w0, w0, w1
+# CHECK-NEXT: [1,2] D==========eeER.. crc32b w0, w0, w1
+# CHECK-NEXT: [1,3] D============eeER crc32b w0, w0, w0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 4.5 0.5 0.0 mul w0, w0, w0
+# CHECK-NEXT: 1. 2 6.5 0.0 0.0 crc32b w0, w0, w1
+# CHECK-NEXT: 2. 2 7.5 0.0 0.0 crc32b w0, w0, w1
+# CHECK-NEXT: 3. 2 9.5 0.0 0.0 crc32b w0, w0, w0
+# CHECK-NEXT: 2 7.0 0.1 0.0 <total>
+
+# CHECK: [18] Code Region - Z saba
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . saba z0.d, z1.d, z2.d
+# CHECK-NEXT: [0,2] D======eeeeER . . . . saba z0.d, z1.d, z2.d
+# CHECK-NEXT: [0,3] D==========eeeeER . . . saba z0.d, z0.d, z1.d
+# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeER . saba z0.d, z1.d, z2.d
+# CHECK-NEXT: [1,2] D====================eeeeER . saba z0.d, z1.d, z2.d
+# CHECK-NEXT: [1,3] D========================eeeeER saba z0.d, z0.d, z1.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.0 0.0 0.0 saba z0.d, z1.d, z2.d
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 saba z0.d, z1.d, z2.d
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 saba z0.d, z0.d, z1.d
+# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+
+# CHECK: [19] Code Region - Z sadalp
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . sadalp z0.d, p0/m, z1.s
+# CHECK-NEXT: [0,2] D======eeeeER . . . . sadalp z0.d, p0/m, z1.s
+# CHECK-NEXT: [0,3] D==========eeeeER . . . sadalp z0.d, p0/m, z0.s
+# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeER . sadalp z0.d, p0/m, z1.s
+# CHECK-NEXT: [1,2] D====================eeeeER . sadalp z0.d, p0/m, z1.s
+# CHECK-NEXT: [1,3] D========================eeeeER sadalp z0.d, p0/m, z0.s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.0 0.0 0.0 sadalp z0.d, p0/m, z1.s
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 sadalp z0.d, p0/m, z1.s
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 sadalp z0.d, p0/m, z0.s
+# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+
+# CHECK: [20] Code Region - Z ssra
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . ssra z0.d, z1.d, #1
+# CHECK-NEXT: [0,2] D======eeeeER . . . . ssra z0.d, z1.d, #1
+# CHECK-NEXT: [0,3] D==========eeeeER . . . ssra z0.d, z0.d, #1
+# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeER . ssra z0.d, z1.d, #1
+# CHECK-NEXT: [1,2] D====================eeeeER . ssra z0.d, z1.d, #1
+# CHECK-NEXT: [1,3] D========================eeeeER ssra z0.d, z0.d, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.0 0.0 0.0 ssra z0.d, z1.d, #1
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 ssra z0.d, z1.d, #1
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 ssra z0.d, z0.d, #1
+# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+
+# CHECK: [21] Code Region - Z cdot.s
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1203
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeER . . .. cdot z0.s, z1.b, z2.b, #90
+# CHECK-NEXT: [0,2] D======eeeER . . .. cdot z0.s, z1.b, z2.b, #90
+# CHECK-NEXT: [0,3] D=========eeeER. . .. cdot z0.s, z0.b, z1.b, #90
+# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=================eeeER .. cdot z0.s, z1.b, z2.b, #90
+# CHECK-NEXT: [1,2] D==================eeeER .. cdot z0.s, z1.b, z2.b, #90
+# CHECK-NEXT: [1,3] D=====================eeeER cdot z0.s, z0.b, z1.b, #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 cdot z0.s, z1.b, z2.b, #90
+# CHECK-NEXT: 2. 2 13.0 0.0 0.0 cdot z0.s, z1.b, z2.b, #90
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 cdot z0.s, z0.b, z1.b, #90
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [22] Code Region - Z cdot.d
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1203
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeER . . .. cdot z0.d, z1.h, z2.h, #90
+# CHECK-NEXT: [0,2] D======eeeER . . .. cdot z0.d, z1.h, z2.h, #90
+# CHECK-NEXT: [0,3] D=========eeeER. . .. cdot z0.d, z0.h, z1.h, #90
+# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=================eeeER .. cdot z0.d, z1.h, z2.h, #90
+# CHECK-NEXT: [1,2] D==================eeeER .. cdot z0.d, z1.h, z2.h, #90
+# CHECK-NEXT: [1,3] D=====================eeeER cdot z0.d, z0.h, z1.h, #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 cdot z0.d, z1.h, z2.h, #90
+# CHECK-NEXT: 2. 2 13.0 0.0 0.0 cdot z0.d, z1.h, z2.h, #90
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 cdot z0.d, z0.h, z1.h, #90
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [23] Code Region - Z cmla.b
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . cmla z0.b, z1.b, z2.b, #90
+# CHECK-NEXT: [0,2] D======eeeeER . . . . cmla z0.b, z1.b, z2.b, #90
+# CHECK-NEXT: [0,3] D==========eeeeER . . . cmla z0.b, z0.b, z1.b, #90
+# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeER . cmla z0.b, z1.b, z2.b, #90
+# CHECK-NEXT: [1,2] D====================eeeeER . cmla z0.b, z1.b, z2.b, #90
+# CHECK-NEXT: [1,3] D========================eeeeER cmla z0.b, z0.b, z1.b, #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.0 0.0 0.0 cmla z0.b, z1.b, z2.b, #90
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 cmla z0.b, z1.b, z2.b, #90
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 cmla z0.b, z0.b, z1.b, #90
+# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+
+# CHECK: [24] Code Region - Z cmla.d
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1803
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.28
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . cmla z0.d, z1.d, z2.d, #90
+# CHECK-NEXT: [0,2] D========eeeeeER . . . . . cmla z0.d, z1.d, z2.d, #90
+# CHECK-NEXT: [0,3] D=============eeeeeER . . . . cmla z0.d, z0.d, z1.d, #90
+# CHECK-NEXT: [1,0] D==================eeeeeER . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=======================eeeeeER . . cmla z0.d, z1.d, z2.d, #90
+# CHECK-NEXT: [1,2] D==========================eeeeeER . . cmla z0.d, z1.d, z2.d, #90
+# CHECK-NEXT: [1,3] D===============================eeeeeER cmla z0.d, z0.d, z1.d, #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 15.0 0.0 0.0 cmla z0.d, z1.d, z2.d, #90
+# CHECK-NEXT: 2. 2 18.0 0.0 0.0 cmla z0.d, z1.d, z2.d, #90
+# CHECK-NEXT: 3. 2 23.0 0.0 0.0 cmla z0.d, z0.d, z1.d, #90
+# CHECK-NEXT: 2 16.5 0.1 0.0 <total>
+
+# CHECK: [25] Code Region - Z sdot.s
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1203
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeER . . .. sdot z0.s, z1.b, z2.b
+# CHECK-NEXT: [0,2] D======eeeER . . .. sdot z0.s, z1.b, z2.b
+# CHECK-NEXT: [0,3] D=========eeeER. . .. sdot z0.s, z0.b, z1.b
+# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.s, z1.b, z2.b
+# CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.s, z1.b, z2.b
+# CHECK-NEXT: [1,3] D=====================eeeER sdot z0.s, z0.b, z1.b
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.s, z1.b, z2.b
+# CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.s, z1.b, z2.b
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.s, z0.b, z1.b
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [26] Code Region - Z sudot
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1203
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeER . . .. sdot z0.s, z1.b, z2.b[1]
+# CHECK-NEXT: [0,2] D======eeeER . . .. sdot z0.s, z1.b, z2.b[1]
+# CHECK-NEXT: [0,3] D=========eeeER. . .. sdot z0.s, z0.b, z1.b[1]
+# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.s, z1.b, z2.b[1]
+# CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.s, z1.b, z2.b[1]
+# CHECK-NEXT: [1,3] D=====================eeeER sdot z0.s, z0.b, z1.b[1]
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.s, z1.b, z2.b[1]
+# CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.s, z1.b, z2.b[1]
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.s, z0.b, z1.b[1]
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [27] Code Region - Z sdot.d
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1203
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeER . . .. sdot z0.d, z1.h, z2.h
+# CHECK-NEXT: [0,2] D======eeeER . . .. sdot z0.d, z1.h, z2.h
+# CHECK-NEXT: [0,3] D=========eeeER. . .. sdot z0.d, z0.h, z1.h
+# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.d, z1.h, z2.h
+# CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.d, z1.h, z2.h
+# CHECK-NEXT: [1,3] D=====================eeeER sdot z0.d, z0.h, z1.h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.d, z1.h, z2.h
+# CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.d, z1.h, z2.h
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.d, z0.h, z1.h
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [28] Code Region - Z smmla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1103
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 0.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeeER . . . . mul z0.s, z0.s, z0.s
+# CHECK-NEXT: [0,1] D====eeeER. . . . smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: [0,2] D=====eeeER . . . smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: [0,3] D========eeeER . . . smmla z0.s, z0.b, z1.b
+# CHECK-NEXT: [1,0] D===========eeeeER . . mul z0.s, z0.s, z0.s
+# CHECK-NEXT: [1,1] D===============eeeER . smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: [1,2] D================eeeER . smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: [1,3] D===================eeeER smmla z0.s, z0.b, z1.b
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 6.5 0.5 0.0 mul z0.s, z0.s, z0.s
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: 2. 2 11.5 0.0 0.0 smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: 3. 2 14.5 0.0 0.0 smmla z0.s, z0.b, z1.b
+# CHECK-NEXT: 2 10.8 0.1 0.0 <total>
+
+# CHECK: [29] Code Region - Z mla.b
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . mla z0.b, p0/m, z1.b, z2.b
+# CHECK-NEXT: [0,2] D======eeeeER . . . . mla z0.b, p0/m, z1.b, z2.b
+# CHECK-NEXT: [0,3] D==========eeeeER . . . mla z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeER . mla z0.b, p0/m, z1.b, z2.b
+# CHECK-NEXT: [1,2] D====================eeeeER . mla z0.b, p0/m, z1.b, z2.b
+# CHECK-NEXT: [1,3] D========================eeeeER mla z0.b, p0/m, z0.b, z1.b
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.0 0.0 0.0 mla z0.b, p0/m, z1.b, z2.b
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 mla z0.b, p0/m, z1.b, z2.b
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 mla z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+
+# CHECK: [30] Code Region - Z mla.d
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1803
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.28
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . mla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [0,2] D========eeeeeER . . . . . mla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [0,3] D=============eeeeeER . . . . mla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: [1,0] D==================eeeeeER . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=======================eeeeeER . . mla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [1,2] D==========================eeeeeER . . mla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [1,3] D===============================eeeeeER mla z0.d, p0/m, z0.d, z1.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 15.0 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: 2. 2 18.0 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: 3. 2 23.0 0.0 0.0 mla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: 2 16.5 0.1 0.0 <total>
+
+# CHECK: [31] Code Region - Z smlalb
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . smlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [0,2] D======eeeeER . . . . smlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [0,3] D==========eeeeER . . . smlalb z0.d, z0.s, z1.s
+# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeER . smlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [1,2] D====================eeeeER . smlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [1,3] D========================eeeeER smlalb z0.d, z0.s, z1.s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.0 0.0 0.0 smlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 smlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 smlalb z0.d, z0.s, z1.s
+# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+
+# CHECK: [32] Code Region - Z sqdmlalb
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . . sqdmlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [0,2] D=======eeeeER . . . . . sqdmlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [0,3] D===========eeeeER . . . . sqdmlalb z0.d, z0.s, z1.s
+# CHECK-NEXT: [1,0] D===============eeeeeER . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D====================eeeeER . . sqdmlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [1,2] D======================eeeeER . . sqdmlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [1,3] D==========================eeeeER sqdmlalb z0.d, z0.s, z1.s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.5 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.5 0.0 0.0 sqdmlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: 2. 2 15.5 0.0 0.0 sqdmlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: 3. 2 19.5 0.0 0.0 sqdmlalb z0.d, z0.s, z1.s
+# CHECK-NEXT: 2 14.3 0.1 0.0 <total>
+
+# CHECK: [33] Code Region - Z sqrdmlah.b
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . . sqrdmlah z0.b, z1.b, z2.b
+# CHECK-NEXT: [0,2] D=======eeeeER . . . . . sqrdmlah z0.b, z1.b, z2.b
+# CHECK-NEXT: [0,3] D===========eeeeER . . . . sqrdmlah z0.b, z0.b, z1.b
+# CHECK-NEXT: [1,0] D===============eeeeeER . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D====================eeeeER . . sqrdmlah z0.b, z1.b, z2.b
+# CHECK-NEXT: [1,2] D======================eeeeER . . sqrdmlah z0.b, z1.b, z2.b
+# CHECK-NEXT: [1,3] D==========================eeeeER sqrdmlah z0.b, z0.b, z1.b
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.5 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.5 0.0 0.0 sqrdmlah z0.b, z1.b, z2.b
+# CHECK-NEXT: 2. 2 15.5 0.0 0.0 sqrdmlah z0.b, z1.b, z2.b
+# CHECK-NEXT: 3. 2 19.5 0.0 0.0 sqrdmlah z0.b, z0.b, z1.b
+# CHECK-NEXT: 2 14.3 0.1 0.0 <total>
+
+# CHECK: [34] Code Region - Z sqrdmlah.d
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1803
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.28
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . sqrdmlah z0.d, z1.d, z2.d
+# CHECK-NEXT: [0,2] D========eeeeeER . . . . . sqrdmlah z0.d, z1.d, z2.d
+# CHECK-NEXT: [0,3] D=============eeeeeER . . . . sqrdmlah z0.d, z0.d, z1.d
+# CHECK-NEXT: [1,0] D==================eeeeeER . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=======================eeeeeER . . sqrdmlah z0.d, z1.d, z2.d
+# CHECK-NEXT: [1,2] D==========================eeeeeER . . sqrdmlah z0.d, z1.d, z2.d
+# CHECK-NEXT: [1,3] D===============================eeeeeER sqrdmlah z0.d, z0.d, z1.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 15.0 0.0 0.0 sqrdmlah z0.d, z1.d, z2.d
+# CHECK-NEXT: 2. 2 18.0 0.0 0.0 sqrdmlah z0.d, z1.d, z2.d
+# CHECK-NEXT: 3. 2 23.0 0.0 0.0 sqrdmlah z0.d, z0.d, z1.d
+# CHECK-NEXT: 2 16.5 0.1 0.0 <total>
+
+# CHECK: [35] Code Region - Z fcmla ZPmZZ
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: [0,3] D==========eeeeeER . . . . fcmla z0.d, p0/m, z0.d, z1.d, #90
+# CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D==================eeeeeER . . fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: [1,2] D====================eeeeeER . . fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: [1,3] D=========================eeeeeER fcmla z0.d, p0/m, z0.d, z1.d, #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: 2. 2 13.5 0.0 0.0 fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: 3. 2 18.5 0.0 0.0 fcmla z0.d, p0/m, z0.d, z1.d, #90
+# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
+
+# CHECK: [36] Code Region - Z fcmla ZZZI
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . fcmla z0.s, z1.s, z2.s[1], #90
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . fcmla z0.s, z1.s, z2.s[1], #90
+# CHECK-NEXT: [0,3] D==========eeeeeER . . . . fcmla z0.s, z0.s, z1.s[1], #90
+# CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D==================eeeeeER . . fcmla z0.s, z1.s, z2.s[1], #90
+# CHECK-NEXT: [1,2] D====================eeeeeER . . fcmla z0.s, z1.s, z2.s[1], #90
+# CHECK-NEXT: [1,3] D=========================eeeeeER fcmla z0.s, z0.s, z1.s[1], #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 fcmla z0.s, z1.s, z2.s[1], #90
+# CHECK-NEXT: 2. 2 13.5 0.0 0.0 fcmla z0.s, z1.s, z2.s[1], #90
+# CHECK-NEXT: 3. 2 18.5 0.0 0.0 fcmla z0.s, z0.s, z1.s[1], #90
+# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
+
+# CHECK: [37] Code Region - Z fmla ZPmZZ
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeER . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeER. . . . . fmla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . fmla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [0,3] D=========eeeeER . . . fmla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D================eeeeER . . fmla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [1,2] D==================eeeeER. . fmla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [1,3] D======================eeeeER fmla z0.d, p0/m, z0.d, z1.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
+
+# CHECK: [38] Code Region - Z fmla ZZZI
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeER . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeER. . . . . fmla z0.d, z1.d, z2.d[1]
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . fmla z0.d, z1.d, z2.d[1]
+# CHECK-NEXT: [0,3] D=========eeeeER . . . fmla z0.d, z0.d, z1.d[1]
+# CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D================eeeeER . . fmla z0.d, z1.d, z2.d[1]
+# CHECK-NEXT: [1,2] D==================eeeeER. . fmla z0.d, z1.d, z2.d[1]
+# CHECK-NEXT: [1,3] D======================eeeeER fmla z0.d, z0.d, z1.d[1]
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla z0.d, z1.d, z2.d[1]
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmla z0.d, z1.d, z2.d[1]
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla z0.d, z0.d, z1.d[1]
+# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
+
+# CHECK: [39] Code Region - Z fmlalb ZZZ
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeER . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeER. . . . . fmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . fmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,3] D=========eeeeER . . . fmlalb z0.s, z0.h, z1.h
+# CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D================eeeeER . . fmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,2] D==================eeeeER. . fmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,3] D======================eeeeER fmlalb z0.s, z0.h, z1.h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmlalb z0.s, z0.h, z1.h
+# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
+
+# CHECK: [40] Code Region - Z bfdot
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1603
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.25
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,2] D======eeeeeER . . . . . bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,3] D===========eeeeeER . . . . bfdot z0.s, z0.h, z1.h
+# CHECK-NEXT: [1,0] D================eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeeER . . bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,2] D======================eeeeeER. . bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,3] D===========================eeeeeER bfdot z0.s, z0.h, z1.h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 9.0 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: 2. 2 15.0 0.0 0.0 bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: 3. 2 20.0 0.0 0.0 bfdot z0.s, z0.h, z1.h
+# CHECK-NEXT: 2 14.0 0.1 0.0 <total>
+
+# CHECK: [41] Code Region - Z bfmmla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1903
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.21
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeER . . . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeeeER . . . . . . bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,2] D=======eeeeeeER . . . . . bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,3] D=============eeeeeeER . . . . bfmmla z0.s, z0.h, z1.h
+# CHECK-NEXT: [1,0] D===================eeeER. . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D======================eeeeeeER . . bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,2] D==========================eeeeeeER. . bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,3] D================================eeeeeeER bfmmla z0.s, z0.h, z1.h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.5 0.0 0.0 bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: 2. 2 17.5 0.0 0.0 bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: 3. 2 23.5 0.0 0.0 bfmmla z0.s, z0.h, z1.h
+# CHECK-NEXT: 2 16.3 0.1 0.0 <total>
+
+# CHECK: [42] Code Region - bfmlalb
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,3] D==========eeeeeER . . . . bfmlalb z0.s, z0.h, z1.h
+# CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D==================eeeeeER . . bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,2] D====================eeeeeER . . bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,3] D=========================eeeeeER bfmlalb z0.s, z0.h, z1.h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: 2. 2 13.5 0.0 0.0 bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: 3. 2 18.5 0.0 0.0 bfmlalb z0.s, z0.h, z1.h
+# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-neon-instructions.s
new file mode 100644
index 0000000..dc0d027
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-neon-instructions.s
@@ -0,0 +1,3731 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3 -mattr=+aes,+sha3,+sm4 -instruction-tables < %s | FileCheck %s
+
+abs d29, d24
+abs v0.16b, v0.16b
+abs v0.2d, v0.2d
+abs v0.2s, v0.2s
+abs v0.4h, v0.4h
+abs v0.4s, v0.4s
+abs v0.8b, v0.8b
+abs v0.8h, v0.8h
+add d17, d31, d29
+add v0.8b, v0.8b, v0.8b
+addhn v0.2s, v0.2d, v0.2d
+addhn v0.4h, v0.4s, v0.4s
+addhn v0.8b, v0.8h, v0.8h
+addhn2 v0.16b, v0.8h, v0.8h
+addhn2 v0.4s, v0.2d, v0.2d
+addhn2 v0.8h, v0.4s, v0.4s
+addp v0.2d, v0.2d, v0.2d
+addp v0.8b, v0.8b, v0.8b
+addv s0, v0.4s
+addv h0, v0.4h
+addv h0, v0.8h
+addv b0, v0.8b
+addv b0, v0.16b
+aesd v0.16b, v0.16b
+aese v0.16b, v0.16b
+aesimc v0.16b, v0.16b
+aesmc v0.16b, v0.16b
+and v0.8b, v0.8b, v0.8b
+bfcvt h0, s0
+bfcvtn v0.4h, v0.4s
+bfcvtn2 v0.8h, v0.4s
+bfdot v0.2s, v0.4h, v0.4h
+bfdot v0.4s, v0.8h, v0.8h
+bfmlalb v0.4s, v0.8h, v0.8h
+bfmlalb v0.4s, v0.8h, v0.h[3]
+bfmlalt v0.4s, v0.8h, v0.8h
+bfmlalt v0.4s, v0.8h, v0.h[3]
+bfmmla v0.4s, v0.8h, v0.8h
+bic v0.4h, #15, lsl #8
+bic v0.8b, v0.8b, v0.8b
+bif v0.16b, v0.16b, v0.16b
+bit v0.16b, v0.16b, v0.16b
+bsl v0.8b, v0.8b, v0.8b
+cls v0.16b, v0.16b
+cls v0.2s, v0.2s
+cls v0.4h, v0.4h
+cls v0.4s, v0.4s
+cls v0.8b, v0.8b
+cls v0.8h, v0.8h
+clz v0.16b, v0.16b
+clz v0.2s, v0.2s
+clz v0.4h, v0.4h
+clz v0.4s, v0.4s
+clz v0.8b, v0.8b
+clz v0.8h, v0.8h
+cmeq d20, d21, 0
+cmeq d20, d21, d22
+cmeq v0.16b, v0.16b, 0
+cmeq v0.16b, v0.16b, v0.16b
+cmge d20, d21, 0
+cmge d20, d21, d22
+cmge v0.4h, v0.4h, v0.4h
+cmge v0.8b, v0.8b, 0
+cmgt d20, d21, 0
+cmgt d20, d21, d22
+cmgt v0.2s, v0.2s, 0
+cmgt v0.4s, v0.4s, v0.4s
+cmhi d20, d21, d22
+cmhi v0.8h, v0.8h, v0.8h
+cmhs d20, d21, d22
+cmhs v0.8b, v0.8b, v0.8b
+cmle d20, d21, 0
+cmle v0.2d, v0.2d, 0
+cmlt d20, d21, 0
+cmlt v0.8h, v0.8h, 0
+cmtst d20, d21, d22
+cmtst v0.2s, v0.2s, v0.2s
+cnt v0.16b, v0.16b
+cnt v0.8b, v0.8b
+dup v0.16b,w28
+dup v0.2d,x28
+dup v0.2s,w28
+dup v0.4h,w28
+dup v0.4s,w28
+dup v0.8b,w28
+dup v0.8h,w28
+dup b0, v0.b[1]
+dup d0, v0.d[1]
+dup h0, v0.h[1]
+dup s0, v0.s[1]
+dup v0.16b, v0.b[1]
+dup v0.2d, v0.d[1]
+dup v0.2s, v0.s[1]
+dup v0.4h, v0.h[1]
+dup v0.4s, v0.s[1]
+dup v0.8b, v0.b[1]
+dup v0.8h, v0.h[1]
+eor v0.16b, v0.16b, v0.16b
+ext v0.16b, v0.16b, v0.16b, #3
+ext v0.8b, v0.8b, v0.8b, #3
+fabd d29, d24, d20
+fabd s29, s24, s20
+fabd v0.4s, v0.4s, v0.4s
+fabs v0.2d, v0.2d
+fabs v0.2s, v0.2s
+fabs v0.4h, v0.4h
+fabs v0.4s, v0.4s
+fabs v0.8h, v0.8h
+facge d20, d21, d22
+facge s10, s11, s12
+facge v0.4s, v0.4s, v0.4s
+facgt d20, d21, d22
+facgt s10, s11, s12
+facgt v0.2d, v0.2d, v0.2d
+fadd v0.4s, v0.4s, v0.4s
+faddp v0.2s, v0.2s, v0.2s
+faddp v0.4s, v0.4s, v0.4s
+fcadd v0.2s, v0.2s, v0.2s, 90
+fcadd v0.4s, v0.4s, v0.4s, 270
+fcmeq d20, d21, #0.0
+fcmeq d20, d21, d22
+fcmeq s10, s11, #0.0
+fcmeq s10, s11, s12
+fcmeq v0.2s, v0.2s, #0.0
+fcmeq v0.2s, v0.2s, v0.2s
+fcmge d20, d21, #0.0
+fcmge d20, d21, d22
+fcmge s10, s11, #0.0
+fcmge s10, s11, s12
+fcmge v0.2d, v0.2d, #0.0
+fcmge v0.4s, v0.4s, v0.4s
+fcmgt d20, d21, #0.0
+fcmgt d20, d21, d22
+fcmgt s10, s11, #0.0
+fcmgt s10, s11, s12
+fcmgt v0.4s, v0.4s, #0.0
+fcmgt v0.4s, v0.4s, v0.4s
+fcmla v0.2s, v0.2s, v0.2s, #90
+fcmla v0.4s, v0.4s, v0.s[1], #0
+fcmle d20, d21, #0.0
+fcmle s10, s11, #0.0
+fcmle v0.2d, v0.2d, #0.0
+fcmlt d20, d21, #0.0
+fcmlt s10, s11, #0.0
+fcmlt v0.4s, v0.4s, #0.0
+fcvtas d21, d14
+fcvtas s12, s13
+fcvtas h12, h13
+fcvtas v0.2d, v0.2d
+fcvtas v0.2s, v0.2s
+fcvtas v0.4h, v0.4h
+fcvtas v0.4s, v0.4s
+fcvtas v0.8h, v0.8h
+fcvtau d21, d14
+fcvtau s12, s13
+fcvtau h12, h13
+fcvtau v0.2d, v0.2d
+fcvtau v0.2s, v0.2s
+fcvtau v0.4h, v0.4h
+fcvtau v0.4s, v0.4s
+fcvtau v0.8h, v0.8h
+fcvtl v0.2d, v0.2s
+fcvtl v0.4s, v0.4h
+fcvtl2 v0.2d, v0.4s
+fcvtl2 v0.4s, v0.8h
+fcvtms d21, d14
+fcvtms s22, s13
+fcvtms h22, h13
+fcvtms v0.2d, v0.2d
+fcvtms v0.2s, v0.2s
+fcvtms v0.4h, v0.4h
+fcvtms v0.4s, v0.4s
+fcvtms v0.8h, v0.8h
+fcvtmu d21, d14
+fcvtmu s12, s13
+fcvtmu h12, h13
+fcvtmu v0.2d, v0.2d
+fcvtmu v0.2s, v0.2s
+fcvtmu v0.4h, v0.4h
+fcvtmu v0.4s, v0.4s
+fcvtmu v0.8h, v0.8h
+fcvtn v0.2s, v0.2d
+fcvtn v0.4h, v0.4s
+fcvtn2 v0.4s, v0.2d
+fcvtn2 v0.8h, v0.4s
+fcvtns d21, d14
+fcvtns s22, s13
+fcvtns h22, h13
+fcvtns v0.2d, v0.2d
+fcvtns v0.2s, v0.2s
+fcvtns v0.4h, v0.4h
+fcvtns v0.4s, v0.4s
+fcvtns v0.8h, v0.8h
+fcvtnu d21, d14
+fcvtnu s12, s13
+fcvtnu h12, h13
+fcvtnu v0.2d, v0.2d
+fcvtnu v0.2s, v0.2s
+fcvtnu v0.4h, v0.4h
+fcvtnu v0.4s, v0.4s
+fcvtnu v0.8h, v0.8h
+fcvtps d21, d14
+fcvtps s22, s13
+fcvtps h22, h13
+fcvtps v0.2d, v0.2d
+fcvtps v0.2s, v0.2s
+fcvtps v0.4h, v0.4h
+fcvtps v0.4s, v0.4s
+fcvtps v0.8h, v0.8h
+fcvtpu d21, d14
+fcvtpu s12, s13
+fcvtpu h12, h13
+fcvtpu v0.2d, v0.2d
+fcvtpu v0.2s, v0.2s
+fcvtpu v0.4h, v0.4h
+fcvtpu v0.4s, v0.4s
+fcvtpu v0.8h, v0.8h
+fcvtxn s22, d13
+fcvtxn v0.2s, v0.2d
+fcvtxn2 v0.4s, v0.2d
+fcvtzs d21, d12, #1
+fcvtzs d21, d14
+fcvtzs s12, s13
+fcvtzs s21, s12, #1
+fcvtzs h21, h14
+fcvtzs h21, h12, #1
+fcvtzs v0.2d, v0.2d
+fcvtzs v0.2d, v0.2d, #3
+fcvtzs v0.2s, v0.2s
+fcvtzs v0.2s, v0.2s, #3
+fcvtzs v0.4h, v0.4h
+fcvtzs v0.4s, v0.4s
+fcvtzs v0.4s, v0.4s, #3
+fcvtzs v0.8h, v0.8h
+fcvtzu d21, d12, #1
+fcvtzu d21, d14
+fcvtzu s12, s13
+fcvtzu s21, s12, #1
+fcvtzu h12, h13
+fcvtzu h21, h12, #1
+fcvtzu v0.2d, v0.2d
+fcvtzu v0.2d, v0.2d, #3
+fcvtzu v0.2s, v0.2s
+fcvtzu v0.2s, v0.2s, #3
+fcvtzu v0.4h, v0.4h
+fcvtzu v0.4s, v0.4s
+fcvtzu v0.4s, v0.4s, #3
+fcvtzu v0.8h, v0.8h
+fdiv v0.2d, v0.2d, v0.2d
+fdiv v0.2s, v0.2s, v0.2s
+fdiv v0.4h, v0.4h, v0.4h
+fdiv v0.4s, v0.4s, v0.4s
+fdiv v0.8h, v0.8h, v0.8h
+fmax v0.2d, v0.2d, v0.2d
+fmax v0.2s, v0.2s, v0.2s
+fmax v0.4s, v0.4s, v0.4s
+fmaxnm v0.2d, v0.2d, v0.2d
+fmaxnm v0.2s, v0.2s, v0.2s
+fmaxnm v0.4s, v0.4s, v0.4s
+fmaxnmp v0.2d, v0.2d, v0.2d
+fmaxnmp v0.2s, v0.2s, v0.2s
+fmaxnmp v0.4s, v0.4s, v0.4s
+fmaxp v0.2d, v0.2d, v0.2d
+fmaxp v0.2s, v0.2s, v0.2s
+fmaxp v0.4s, v0.4s, v0.4s
+fmaxv h0, v0.4h
+fmaxv h0, v0.8h
+fmaxv s0, v0.4s
+fmin v0.2d, v0.2d, v0.2d
+fmin v0.2s, v0.2s, v0.2s
+fmin v0.4s, v0.4s, v0.4s
+fminnm v0.2d, v0.2d, v0.2d
+fminnm v0.2s, v0.2s, v0.2s
+fminnm v0.4s, v0.4s, v0.4s
+fminnmp v0.2d, v0.2d, v0.2d
+fminnmp v0.2s, v0.2s, v0.2s
+fminnmp v0.4s, v0.4s, v0.4s
+fminp v0.2d, v0.2d, v0.2d
+fminp v0.2s, v0.2s, v0.2s
+fminp v0.4s, v0.4s, v0.4s
+fmla d0, d1, v0.d[1]
+fmla s0, s1, v0.s[3]
+fmla v0.2s, v0.2s, v0.2s
+fmlal v0.2s, v0.2h, v0.h[1]
+fmlal v0.4s, v0.4h, v0.h[3]
+fmlal v0.2s, v0.2h, v0.2h
+fmlal v0.4s, v0.4h, v0.4h
+fmlal2 v0.2s, v0.2h, v0.h[1]
+fmlal2 v0.4s, v0.4h, v0.h[3]
+fmlal2 v0.2s, v0.2h, v0.2h
+fmlal2 v0.4s, v0.4h, v0.4h
+fmls d0, d4, v0.d[1]
+fmls s3, s5, v0.s[3]
+fmls v0.2s, v0.2s, v0.2s
+fmlsl v0.2s, v0.2h, v0.h[1]
+fmlsl v0.4s, v0.4h, v0.h[3]
+fmlsl v0.2s, v0.2h, v0.2h
+fmlsl v0.4s, v0.4h, v0.4h
+fmlsl2 v0.2s, v0.2h, v0.h[1]
+fmlsl2 v0.4s, v0.4h, v0.h[3]
+fmlsl2 v0.2s, v0.2h, v0.2h
+fmlsl2 v0.4s, v0.4h, v0.4h
+fmov v0.2d, #-1.25
+fmov v0.2s, #13.0
+fmov v0.4s, #1.0
+fmul d0, d1, v0.d[1]
+fmul s0, s1, v0.s[3]
+fmul v0.2s, v0.2s, v0.2s
+fmulx d0, d4, v0.d[1]
+fmulx d23, d11, d1
+fmulx s20, s22, s15
+fmulx s3, s5, v0.s[3]
+fmulx v0.2d, v0.2d, v0.2d
+fmulx v0.2s, v0.2s, v0.2s
+fmulx v0.4s, v0.4s, v0.4s
+fneg v0.2d, v0.2d
+fneg v0.2s, v0.2s
+fneg v0.4h, v0.4h
+fneg v0.4s, v0.4s
+fneg v0.8h, v0.8h
+frecpe d13, d13
+frecpe s19, s14
+frecpe v0.2d, v0.2d
+frecpe v0.2s, v0.2s
+frecpe v0.4h, v0.4h
+frecpe v0.4s, v0.4s
+frecpe v0.8h, v0.8h
+frecps v0.4s, v0.4s, v0.4s
+frecps d22, d30, d21
+frecps s21, s16, s13
+frecpx d16, d19
+frecpx s18, s10
+frint32x v0.2d, v0.2d
+frint32x v0.2s, v0.2s
+frint32x v0.4s, v0.4s
+frint32z v0.2d, v0.2d
+frint32z v0.2s, v0.2s
+frint32z v0.4s, v0.4s
+frint64x v0.2d, v0.2d
+frint64x v0.2s, v0.2s
+frint64x v0.4s, v0.4s
+frint64z v0.2d, v0.2d
+frint64z v0.2s, v0.2s
+frint64z v0.4s, v0.4s
+frinta v0.2d, v0.2d
+frinta v0.2s, v0.2s
+frinta v0.4h, v0.4h
+frinta v0.4s, v0.4s
+frinta v0.8h, v0.8h
+frinti v0.2d, v0.2d
+frinti v0.2s, v0.2s
+frinti v0.4h, v0.4h
+frinti v0.4s, v0.4s
+frinti v0.8h, v0.8h
+frintm v0.2d, v0.2d
+frintm v0.2s, v0.2s
+frintm v0.4h, v0.4h
+frintm v0.4s, v0.4s
+frintm v0.8h, v0.8h
+frintn v0.2d, v0.2d
+frintn v0.2s, v0.2s
+frintn v0.4h, v0.4h
+frintn v0.4s, v0.4s
+frintn v0.8h, v0.8h
+frintp v0.2d, v0.2d
+frintp v0.2s, v0.2s
+frintp v0.4h, v0.4h
+frintp v0.4s, v0.4s
+frintp v0.8h, v0.8h
+frintx v0.2d, v0.2d
+frintx v0.2s, v0.2s
+frintx v0.4h, v0.4h
+frintx v0.4s, v0.4s
+frintx v0.8h, v0.8h
+frintz v0.2d, v0.2d
+frintz v0.2s, v0.2s
+frintz v0.4h, v0.4h
+frintz v0.4s, v0.4s
+frintz v0.8h, v0.8h
+frsqrte d21, d12
+frsqrte s22, s13
+frsqrte v0.2d, v0.2d
+frsqrte v0.2s, v0.2s
+frsqrte v0.4h, v0.4h
+frsqrte v0.4s, v0.4s
+frsqrte v0.8h, v0.8h
+frsqrts d8, d22, d18
+frsqrts s21, s5, s12
+frsqrts v0.2d, v0.2d, v0.2d
+fsqrt v0.2d, v0.2d
+fsqrt v0.2s, v0.2s
+fsqrt v0.4h, v0.4h
+fsqrt v0.4s, v0.4s
+fsqrt v0.8h, v0.8h
+fsub v0.2s, v0.2s, v0.2s
+ld1 { v0.16b }, [x0]
+ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+ld1 { v0.4s, v1.4s }, [sp], #32
+ld1 { v0.4s, v1.4s, v2.4s }, [sp]
+ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+ld1 { v0.8h }, [x15], x2
+ld1 { v0.8h, v1.8h }, [x15]
+ld1 { v0.b }[9], [x0]
+ld1 { v0.b }[9], [x0], #1
+ld1r { v0.16b }, [x0]
+ld1r { v0.16b }, [x0], #1
+ld1r { v0.8h }, [x15]
+ld1r { v0.8h }, [x15], #2
+ld2 { v0.16b, v1.16b }, [x0], x1
+ld2 { v0.8b, v1.8b }, [x0]
+ld2 { v0.h, v1.h }[7], [x15]
+ld2 { v0.h, v1.h }[7], [x15], #4
+ld2r { v0.2d, v1.2d }, [x0]
+ld2r { v0.2d, v1.2d }, [x0], #16
+ld2r { v0.4s, v1.4s }, [sp]
+ld2r { v0.4s, v1.4s }, [sp], #8
+ld3 { v0.4h, v1.4h, v2.4h }, [x15]
+ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+ld3 { v0.s, v1.s, v2.s }[3], [sp]
+ld3 { v0.s, v1.s, v2.s }[3], [sp], x3
+ld3r { v0.4h, v1.4h, v2.4h }, [x15]
+ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6
+ld3r { v0.8b, v1.8b, v2.8b }, [x0]
+ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3
+ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
+ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32
+ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
+ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp]
+ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7
+ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30
+mla v0.8b, v0.8b, v0.8b
+mls v0.4h, v0.4h, v0.4h
+mov b0, v0.b[15]
+mov d6, v0.d[1]
+mov h2, v0.h[5]
+mov s17, v0.s[2]
+mov v2.b[0], v0.b[0]
+mov v2.h[1], v0.h[1]
+mov v2.s[2], v0.s[2]
+mov v2.d[1], v0.d[1]
+mov v0.b[0], w8
+mov v0.h[1], w8
+mov v0.s[2], w8
+mov v0.d[1], x8
+mov v0.16b, v0.16b
+mov v0.8b, v0.8b
+movi d15, #0xff00ff00ff00ff
+movi v0.16b, #31
+movi v0.2d, #0xff0000ff0000ffff
+movi v0.2s, #8, msl #8
+movi v0.4s, #255, lsl #24
+movi v0.8b, #255
+mul v0.8b, v0.8b, v0.8b
+mvni v0.2s, 0
+mvni v0.4s, #16, msl #16
+neg d29, d24
+neg v0.16b, v0.16b
+neg v0.2d, v0.2d
+neg v0.2s, v0.2s
+neg v0.4h, v0.4h
+neg v0.4s, v0.4s
+neg v0.8b, v0.8b
+neg v0.8h, v0.8h
+not v0.16b, v0.16b
+not v0.8b, v0.8b
+orn v0.16b, v0.16b, v0.16b
+orr v0.16b, v0.16b, v0.16b
+orr v0.8h, #31
+pmul v0.16b, v0.16b, v0.16b
+pmul v0.8b, v0.8b, v0.8b
+pmull v0.8h, v0.8b, v0.8b
+pmull2 v0.8h, v0.16b, v0.16b
+raddhn v0.2s, v0.2d, v0.2d
+raddhn v0.4h, v0.4s, v0.4s
+raddhn v0.8b, v0.8h, v0.8h
+raddhn2 v0.16b, v0.8h, v0.8h
+raddhn2 v0.4s, v0.2d, v0.2d
+raddhn2 v0.8h, v0.4s, v0.4s
+rbit v0.16b, v0.16b
+rbit v0.8b, v0.8b
+rev16 v21.8b, v1.8b
+rev16 v30.16b, v31.16b
+rev32 v0.4h, v9.4h
+rev32 v21.8b, v1.8b
+rev32 v30.16b, v31.16b
+rev32 v4.8h, v7.8h
+rev64 v0.16b, v31.16b
+rev64 v1.8b, v9.8b
+rev64 v13.4h, v21.4h
+rev64 v2.8h, v4.8h
+rev64 v4.2s, v0.2s
+rev64 v6.4s, v8.4s
+rshrn v0.2s, v0.2d, #3
+rshrn v0.4h, v0.4s, #3
+rshrn v0.8b, v0.8h, #3
+rshrn2 v0.16b, v0.8h, #3
+rshrn2 v0.4s, v0.2d, #3
+rshrn2 v0.8h, v0.4s, #3
+rsubhn v0.2s, v0.2d, v0.2d
+rsubhn v0.4h, v0.4s, v0.4s
+rsubhn v0.8b, v0.8h, v0.8h
+rsubhn2 v0.16b, v0.8h, v0.8h
+rsubhn2 v0.4s, v0.2d, v0.2d
+rsubhn2 v0.8h, v0.4s, v0.4s
+saba v0.16b, v0.16b, v0.16b
+sabal v0.2d, v0.2s, v0.2s
+sabal v0.4s, v0.4h, v0.4h
+sabal v0.8h, v0.8b, v0.8b
+sabal2 v0.2d, v0.4s, v0.4s
+sabal2 v0.4s, v0.8h, v0.8h
+sabal2 v0.8h, v0.16b, v0.16b
+sabd v0.4h, v0.4h, v0.4h
+sabdl v0.2d, v0.2s, v0.2s
+sabdl v0.4s, v0.4h, v0.4h
+sabdl v0.8h, v0.8b, v0.8b
+sabdl2 v0.2d, v0.4s, v0.4s
+sabdl2 v0.4s, v0.8h, v0.8h
+sabdl2 v0.8h, v0.16b, v0.16b
+sadalp v0.1d, v0.2s
+sadalp v0.2d, v0.4s
+sadalp v0.2s, v0.4h
+sadalp v0.4h, v0.8b
+sadalp v0.4s, v0.8h
+sadalp v0.8h, v0.16b
+saddl v0.2d, v0.2s, v0.2s
+saddl v0.4s, v0.4h, v0.4h
+saddl v0.8h, v0.8b, v0.8b
+saddl2 v0.2d, v0.4s, v0.4s
+saddl2 v0.4s, v0.8h, v0.8h
+saddl2 v0.8h, v0.16b, v0.16b
+saddlp v0.1d, v0.2s
+saddlp v0.2d, v0.4s
+saddlp v0.2s, v0.4h
+saddlp v0.4h, v0.8b
+saddlp v0.4s, v0.8h
+saddlp v0.8h, v0.16b
+saddlv d0, v0.4s
+saddlv s0, v0.4h
+saddlv s0, v0.8h
+saddlv h0, v0.8b
+saddlv h0, v0.16b
+saddw v0.2d, v0.2d, v0.2s
+saddw v0.4s, v0.4s, v0.4h
+saddw v0.8h, v0.8h, v0.8b
+saddw2 v0.2d, v0.2d, v0.4s
+saddw2 v0.4s, v0.4s, v0.8h
+saddw2 v0.8h, v0.8h, v0.16b
+scvtf d21, d12
+scvtf d21, d12, #64
+scvtf s22, s13
+scvtf s22, s13, #32
+scvtf v0.2d, v0.2d
+scvtf v0.2d, v0.2d, #3
+scvtf v0.2s, v0.2s
+scvtf v0.2s, v0.2s, #3
+scvtf v0.4h, v0.4h
+scvtf v0.4s, v0.4s
+scvtf v0.4s, v0.4s, #3
+scvtf v0.8h, v0.8h
+sdot v0.2s, v0.8b, v0.4b[2]
+sdot v0.2s, v0.8b, v0.8b
+sdot v0.4s, v0.16b, v0.16b
+sdot v0.4s, v0.16b, v0.4b[2]
+shadd v0.8b, v0.8b, v0.8b
+shl d7, d10, #12
+shl v0.16b, v0.16b, #3
+shl v0.2d, v0.2d, #3
+shl v0.4h, v0.4h, #3
+shl v0.4s, v0.4s, #3
+shll v0.2d, v0.2s, #32
+shll v0.4s, v0.4h, #16
+shll v0.8h, v0.8b, #8
+shll v0.2d, v0.2s, #32
+shll v0.4s, v0.4h, #16
+shll v0.8h, v0.8b, #8
+shll2 v0.2d, v0.4s, #32
+shll2 v0.4s, v0.8h, #16
+shll2 v0.8h, v0.16b, #8
+shll2 v0.2d, v0.4s, #32
+shll2 v0.4s, v0.8h, #16
+shll2 v0.8h, v0.16b, #8
+shrn v0.2s, v0.2d, #3
+shrn v0.4h, v0.4s, #3
+shrn v0.8b, v0.8h, #3
+shrn2 v0.16b, v0.8h, #3
+shrn2 v0.4s, v0.2d, #3
+shrn2 v0.8h, v0.4s, #3
+shsub v0.2s, v0.2s, v0.2s
+shsub v0.4h, v0.4h, v0.4h
+sli d10, d14, #12
+sli v0.16b, v0.16b, #3
+sli v0.2d, v0.2d, #3
+sli v0.2s, v0.2s, #3
+sli v0.4h, v0.4h, #3
+sli v0.4s, v0.4s, #3
+sli v0.8b, v0.8b, #3
+sli v0.8h, v0.8h, #3
+smax v0.2s, v0.2s, v0.2s
+smax v0.4h, v0.4h, v0.4h
+smax v0.8b, v0.8b, v0.8b
+smaxp v0.2s, v0.2s, v0.2s
+smaxp v0.4h, v0.4h, v0.4h
+smaxp v0.8b, v0.8b, v0.8b
+smaxv b0, v0.8b
+smaxv b0, v0.16b
+smaxv h0, v0.4h
+smaxv h0, v0.8h
+smaxv s0, v0.4s
+smin v0.16b, v0.16b, v0.16b
+smin v0.4s, v0.4s, v0.4s
+smin v0.8h, v0.8h, v0.8h
+sminp v0.16b, v0.16b, v0.16b
+sminp v0.4s, v0.4s, v0.4s
+sminp v0.8h, v0.8h, v0.8h
+sminv b0, v0.8b
+sminv b0, v0.16b
+sminv h0, v0.4h
+sminv h0, v0.8h
+sminv s0, v0.4s
+smlal v0.2d, v0.2s, v0.2s
+smlal v0.4s, v0.4h, v0.4h
+smlal v0.8h, v0.8b, v0.8b
+smlal2 v0.2d, v0.4s, v0.4s
+smlal2 v0.4s, v0.8h, v0.8h
+smlal2 v0.8h, v0.16b, v0.16b
+smlsl v0.2d, v0.2s, v0.2s
+smlsl v0.4s, v0.4h, v0.4h
+smlsl v0.8h, v0.8b, v0.8b
+smlsl2 v0.2d, v0.4s, v0.4s
+smlsl2 v0.4s, v0.8h, v0.8h
+smlsl2 v0.8h, v0.16b, v0.16b
+smmla v0.4s, v0.16b, v0.16b
+smull v0.2d, v0.2s, v0.2s
+smull v0.4s, v0.4h, v0.4h
+smull v0.8h, v0.8b, v0.8b
+smull2 v0.2d, v0.4s, v0.4s
+smull2 v0.4s, v0.8h, v0.8h
+smull2 v0.8h, v0.16b, v0.16b
+sqabs b19, b14
+sqabs d18, d12
+sqabs h21, h15
+sqabs s20, s12
+sqabs v0.16b, v0.16b
+sqabs v0.2d, v0.2d
+sqabs v0.2s, v0.2s
+sqabs v0.4h, v0.4h
+sqabs v0.4s, v0.4s
+sqabs v0.8b, v0.8b
+sqabs v0.8h, v0.8h
+sqadd b20, b11, b15
+sqadd v0.16b, v0.16b, v0.16b
+sqadd v0.2s, v0.2s, v0.2s
+sqdmlal d19, s24, s12
+sqdmlal d8, s9, v0.s[1]
+sqdmlal s0, h0, v0.h[3]
+sqdmlal s17, h27, h12
+sqdmlal v0.2d, v0.2s, v0.2s
+sqdmlal v0.4s, v0.4h, v0.4h
+sqdmlal2 v0.2d, v0.4s, v0.4s
+sqdmlal2 v0.4s, v0.8h, v0.8h
+sqdmlsl d12, s23, s13
+sqdmlsl d8, s9, v0.s[1]
+sqdmlsl s0, h0, v0.h[3]
+sqdmlsl s14, h12, h25
+sqdmlsl v0.2d, v0.2s, v0.2s
+sqdmlsl v0.4s, v0.4h, v0.4h
+sqdmlsl2 v0.2d, v0.4s, v0.4s
+sqdmlsl2 v0.4s, v0.8h, v0.8h
+sqdmulh h10, h11, h12
+sqdmulh h7, h15, v0.h[3]
+sqdmulh s15, s14, v0.s[1]
+sqdmulh s20, s21, s2
+sqdmulh v0.2s, v0.2s, v0.2s
+sqdmulh v0.4s, v0.4s, v0.4s
+sqdmull d1, s1, v0.s[1]
+sqdmull d15, s22, s12
+sqdmull s1, h1, v0.h[3]
+sqdmull s12, h22, h12
+sqdmull v0.2d, v0.2s, v0.2s
+sqdmull v0.4s, v0.4h, v0.4h
+sqdmull2 v0.2d, v0.4s, v0.4s
+sqdmull2 v0.4s, v0.8h, v0.8h
+sqneg b19, b14
+sqneg d18, d12
+sqneg h21, h15
+sqneg s20, s12
+sqneg v0.16b, v0.16b
+sqneg v0.2d, v0.2d
+sqneg v0.2s, v0.2s
+sqneg v0.4h, v0.4h
+sqneg v0.4s, v0.4s
+sqneg v0.8b, v0.8b
+sqneg v0.8h, v0.8h
+sqrdmlah h0, h1, v2.h[3]
+sqrdmlah v0.4h, v1.4h, v2.h[3]
+sqrdmlah v0.8h, v1.8h, v2.h[3]
+sqrdmlah s0, s1, v2.s[1]
+sqrdmlah v0.2s, v1.2s, v2.s[1]
+sqrdmlah v0.4s, v1.4s, v2.s[1]
+sqrdmlah h0, h1, h2
+sqrdmlah v0.4h, v1.4h, v2.4h
+sqrdmlah v0.8h, v1.8h, v2.8h
+sqrdmlah s0, s1, s2
+sqrdmlah v0.2s, v1.2s, v2.2s
+sqrdmlah v0.4s, v1.4s, v2.4s
+sqrdmlsh h0, h1, v2.h[3]
+sqrdmlsh v0.4h, v1.4h, v2.h[3]
+sqrdmlsh v0.8h, v1.8h, v2.h[3]
+sqrdmlsh s0, s1, v2.s[1]
+sqrdmlsh v0.2s, v1.2s, v2.s[1]
+sqrdmlsh v0.4s, v1.4s, v2.s[1]
+sqrdmlsh h0, h1, h2
+sqrdmlsh v0.4h, v1.4h, v2.4h
+sqrdmlsh v0.8h, v1.8h, v2.8h
+sqrdmlsh s0, s1, s2
+sqrdmlsh v0.2s, v1.2s, v2.2s
+sqrdmlsh v0.4s, v1.4s, v2.4s
+sqrdmulh h10, h11, h12
+sqrdmulh h7, h15, v0.h[3]
+sqrdmulh s15, s14, v0.s[1]
+sqrdmulh s20, s21, s2
+sqrdmulh v0.4h, v0.4h, v0.4h
+sqrdmulh v0.8h, v0.8h, v0.8h
+sqrshl d31, d31, d31
+sqrshl h3, h4, h15
+sqrshl v0.2s, v0.2s, v0.2s
+sqrshl v0.4h, v0.4h, v0.4h
+sqrshl v0.8b, v0.8b, v0.8b
+sqrshrn b10, h13, #2
+sqrshrn h15, s10, #6
+sqrshrn s15, d12, #9
+sqrshrn v0.2s, v0.2d, #3
+sqrshrn v0.4h, v0.4s, #3
+sqrshrn v0.8b, v0.8h, #3
+sqrshrn2 v0.16b, v0.8h, #3
+sqrshrn2 v0.4s, v0.2d, #3
+sqrshrn2 v0.8h, v0.4s, #3
+sqrshrun b17, h10, #6
+sqrshrun h10, s13, #15
+sqrshrun s22, d16, #31
+sqrshrun v0.2s, v0.2d, #3
+sqrshrun v0.4h, v0.4s, #3
+sqrshrun v0.8b, v0.8h, #3
+sqrshrun2 v0.16b, v0.8h, #3
+sqrshrun2 v0.4s, v0.2d, #3
+sqrshrun2 v0.8h, v0.4s, #3
+sqshl b11, b19, #7
+sqshl d15, d16, #51
+sqshl d31, d31, d31
+sqshl h13, h18, #11
+sqshl h3, h4, h15
+sqshl s14, s17, #22
+sqshl v0.16b, v0.16b, #3
+sqshl v0.2d, v0.2d, #3
+sqshl v0.2s, v0.2s, #3
+sqshl v0.2s, v0.2s, v0.2s
+sqshl v0.4h, v0.4h, #3
+sqshl v0.4h, v0.4h, v0.4h
+sqshl v0.4s, v0.4s, #3
+sqshl v0.8b, v0.8b, #3
+sqshl v0.8b, v0.8b, v0.8b
+sqshl v0.8h, v0.8h, #3
+sqshlu b15, b18, #6
+sqshlu d11, d13, #32
+sqshlu h19, h17, #6
+sqshlu s16, s14, #25
+sqshlu v0.16b, v0.16b, #3
+sqshlu v0.2d, v0.2d, #3
+sqshlu v0.2s, v0.2s, #3
+sqshlu v0.4h, v0.4h, #3
+sqshlu v0.4s, v0.4s, #3
+sqshlu v0.8b, v0.8b, #3
+sqshlu v0.8h, v0.8h, #3
+sqshrn b10, h15, #5
+sqshrn h17, s10, #4
+sqshrn s18, d10, #31
+sqshrn v0.2s, v0.2d, #3
+sqshrn v0.4h, v0.4s, #3
+sqshrn v0.8b, v0.8h, #3
+sqshrn2 v0.16b, v0.8h, #3
+sqshrn2 v0.4s, v0.2d, #3
+sqshrn2 v0.8h, v0.4s, #3
+sqshrun b15, h10, #7
+sqshrun h20, s14, #3
+sqshrun s10, d15, #15
+sqshrun v0.2s, v0.2d, #3
+sqshrun v0.4h, v0.4s, #3
+sqshrun v0.8b, v0.8h, #3
+sqshrun2 v0.16b, v0.8h, #3
+sqshrun2 v0.4s, v0.2d, #3
+sqshrun2 v0.8h, v0.4s, #3
+sqsub s20, s10, s7
+sqsub v0.2d, v0.2d, v0.2d
+sqsub v0.4s, v0.4s, v0.4s
+sqsub v0.8b, v0.8b, v0.8b
+sqxtn b18, h18
+sqxtn h20, s17
+sqxtn s19, d14
+sqxtn v0.2s, v0.2d
+sqxtn v0.4h, v0.4s
+sqxtn v0.8b, v0.8h
+sqxtn2 v0.16b, v0.8h
+sqxtn2 v0.4s, v0.2d
+sqxtn2 v0.8h, v0.4s
+sqxtun b19, h14
+sqxtun h21, s15
+sqxtun s20, d12
+sqxtun v0.2s, v0.2d
+sqxtun v0.4h, v0.4s
+sqxtun v0.8b, v0.8h
+sqxtun2 v0.16b, v0.8h
+sqxtun2 v0.4s, v0.2d
+sqxtun2 v0.8h, v0.4s
+srhadd v0.2s, v0.2s, v0.2s
+srhadd v0.4h, v0.4h, v0.4h
+srhadd v0.8b, v0.8b, v0.8b
+sri d10, d12, #14
+sri v0.16b, v0.16b, #3
+sri v0.2d, v0.2d, #3
+sri v0.2s, v0.2s, #3
+sri v0.4h, v0.4h, #3
+sri v0.4s, v0.4s, #3
+sri v0.8b, v0.8b, #3
+sri v0.8h, v0.8h, #3
+srshl d16, d16, d16
+srshl v0.2s, v0.2s, v0.2s
+srshl v0.4h, v0.4h, v0.4h
+srshl v0.8b, v0.8b, v0.8b
+srshr d19, d18, #7
+srshr v0.16b, v0.16b, #3
+srshr v0.2d, v0.2d, #3
+srshr v0.2s, v0.2s, #3
+srshr v0.4h, v0.4h, #3
+srshr v0.4s, v0.4s, #3
+srshr v0.8b, v0.8b, #3
+srshr v0.8h, v0.8h, #3
+srsra d15, d11, #19
+srsra v0.16b, v0.16b, #3
+srsra v0.2d, v0.2d, #3
+srsra v0.2s, v0.2s, #3
+srsra v0.4h, v0.4h, #3
+srsra v0.4s, v0.4s, #3
+srsra v0.8b, v0.8b, #3
+srsra v0.8h, v0.8h, #3
+sshl d31, d31, d31
+sshl v0.2d, v0.2d, v0.2d
+sshl v0.2s, v0.2s, v0.2s
+sshl v0.4h, v0.4h, v0.4h
+sshl v0.8b, v0.8b, v0.8b
+sshll v0.2d, v0.2s, #3
+sshll2 v0.4s, v0.8h, #3
+sshr d15, d16, #12
+sshr v0.16b, v0.16b, #3
+sshr v0.2d, v0.2d, #3
+sshr v0.2s, v0.2s, #3
+sshr v0.4h, v0.4h, #3
+sshr v0.4s, v0.4s, #3
+sshr v0.8b, v0.8b, #3
+sshr v0.8h, v0.8h, #3
+ssra d18, d12, #21
+ssra v0.16b, v0.16b, #3
+ssra v0.2d, v0.2d, #3
+ssra v0.2s, v0.2s, #3
+ssra v0.4h, v0.4h, #3
+ssra v0.4s, v0.4s, #3
+ssra v0.8b, v0.8b, #3
+ssra v0.8h, v0.8h, #3
+ssubl v0.2d, v0.2s, v0.2s
+ssubl v0.4s, v0.4h, v0.4h
+ssubl v0.8h, v0.8b, v0.8b
+ssubl2 v0.2d, v0.4s, v0.4s
+ssubl2 v0.4s, v0.8h, v0.8h
+ssubl2 v0.8h, v0.16b, v0.16b
+ssubw v0.2d, v0.2d, v0.2s
+ssubw v0.4s, v0.4s, v0.4h
+ssubw v0.8h, v0.8h, v0.8b
+ssubw2 v0.2d, v0.2d, v0.4s
+ssubw2 v0.4s, v0.4s, v0.8h
+ssubw2 v0.8h, v0.8h, v0.16b
+st1 { v0.16b }, [x0]
+st1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+st1 { v0.4s, v1.4s }, [sp], #32
+st1 { v0.4s, v1.4s, v2.4s }, [sp]
+st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+st1 { v0.8h }, [x15], x2
+st1 { v0.8h, v1.8h }, [x15]
+st1 { v0.d }[1], [x0]
+st1 { v0.d }[1], [x0], #8
+st2 { v0.16b, v1.16b }, [x0], x1
+st2 { v0.8b, v1.8b }, [x0]
+st2 { v0.s, v1.s }[3], [sp]
+st2 { v0.s, v1.s }[3], [sp], #8
+st3 { v0.4h, v1.4h, v2.4h }, [x15]
+st3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+st3 { v0.h, v1.h, v2.h }[7], [x15]
+st3 { v0.h, v1.h, v2.h }[7], [x15], #6
+st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0]
+st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], x5
+sub d15, d5, d16
+sub v0.2d, v0.2d, v0.2d
+sudot v0.2s, v0.8b, v0.4b[2]
+sudot v0.4s, v0.16b, v0.4b[2]
+suqadd b19, b14
+suqadd d18, d22
+suqadd h20, h15
+suqadd s21, s12
+suqadd v0.16b, v0.16b
+suqadd v0.2d, v0.2d
+suqadd v0.2s, v0.2s
+suqadd v0.4h, v0.4h
+suqadd v0.4s, v0.4s
+suqadd v0.8b, v0.8b
+suqadd v0.8h, v0.8h
+tbl v0.16b, { v0.16b }, v0.16b
+tbl v0.16b, { v0.16b, v1.16b }, v0.16b
+tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+tbl v0.8b, { v0.16b }, v0.8b
+tbl v0.8b, { v0.16b, v1.16b }, v0.8b
+tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+tbx v0.16b, { v0.16b }, v0.16b
+tbx v0.16b, { v0.16b, v1.16b }, v0.16b
+tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+tbx v0.8b, { v0.16b }, v0.8b
+tbx v0.8b, { v0.16b, v1.16b }, v0.8b
+tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+trn1 v0.16b, v0.16b, v0.16b
+trn1 v0.2d, v0.2d, v0.2d
+trn1 v0.2s, v0.2s, v0.2s
+trn1 v0.4h, v0.4h, v0.4h
+trn1 v0.4s, v0.4s, v0.4s
+trn1 v0.8b, v0.8b, v0.8b
+trn1 v0.8h, v0.8h, v0.8h
+trn2 v0.16b, v0.16b, v0.16b
+trn2 v0.2d, v0.2d, v0.2d
+trn2 v0.2s, v0.2s, v0.2s
+trn2 v0.4h, v0.4h, v0.4h
+trn2 v0.4s, v0.4s, v0.4s
+trn2 v0.8b, v0.8b, v0.8b
+trn2 v0.8h, v0.8h, v0.8h
+uaba v0.8b, v0.8b, v0.8b
+uabal v0.2d, v0.2s, v0.2s
+uabal v0.4s, v0.4h, v0.4h
+uabal v0.8h, v0.8b, v0.8b
+uabal2 v0.2d, v0.4s, v0.4s
+uabal2 v0.4s, v0.8h, v0.8h
+uabal2 v0.8h, v0.16b, v0.16b
+uabd v0.4h, v0.4h, v0.4h
+uabdl v0.2d, v0.2s, v0.2s
+uabdl v0.4s, v0.4h, v0.4h
+uabdl v0.8h, v0.8b, v0.8b
+uabdl2 v0.2d, v0.4s, v0.4s
+uabdl2 v0.4s, v0.8h, v0.8h
+uabdl2 v0.8h, v0.16b, v0.16b
+uadalp v0.1d, v0.2s
+uadalp v0.2d, v0.4s
+uadalp v0.2s, v0.4h
+uadalp v0.4h, v0.8b
+uadalp v0.4s, v0.8h
+uadalp v0.8h, v0.16b
+uaddl v0.2d, v0.2s, v0.2s
+uaddl v0.4s, v0.4h, v0.4h
+uaddl v0.8h, v0.8b, v0.8b
+uaddl2 v0.2d, v0.4s, v0.4s
+uaddl2 v0.4s, v0.8h, v0.8h
+uaddl2 v0.8h, v0.16b, v0.16b
+uaddlp v0.1d, v0.2s
+uaddlp v0.2d, v0.4s
+uaddlp v0.2s, v0.4h
+uaddlp v0.4h, v0.8b
+uaddlp v0.4s, v0.8h
+uaddlp v0.8h, v0.16b
+uaddlv d0, v0.4s
+uaddlv s0, v0.4h
+uaddlv s0, v0.8h
+uaddlv h0, v0.8b
+uaddlv h0, v0.16b
+uaddw v0.2d, v0.2d, v0.2s
+uaddw v0.4s, v0.4s, v0.4h
+uaddw v0.8h, v0.8h, v0.8b
+uaddw2 v0.2d, v0.2d, v0.4s
+uaddw2 v0.4s, v0.4s, v0.8h
+uaddw2 v0.8h, v0.8h, v0.16b
+ucvtf d21, d14
+ucvtf d21, d14, #64
+ucvtf s22, s13
+ucvtf s22, s13, #32
+ucvtf v0.2d, v0.2d
+ucvtf v0.2d, v0.2d, #3
+ucvtf v0.2s, v0.2s
+ucvtf v0.2s, v0.2s, #3
+ucvtf v0.4h, v0.4h
+ucvtf v0.4s, v0.4s
+ucvtf v0.4s, v0.4s, #3
+ucvtf v0.8h, v0.8h
+udot v0.2s, v0.8b, v0.4b[2]
+udot v0.2s, v0.8b, v0.8b
+udot v0.4s, v0.16b, v0.16b
+udot v0.4s, v0.16b, v0.4b[2]
+uhadd v0.16b, v0.16b, v0.16b
+uhadd v0.8h, v0.8h, v0.8h
+uhsub v0.4s, v0.4s, v0.4s
+umax v0.16b, v0.16b, v0.16b
+umax v0.4s, v0.4s, v0.4s
+umax v0.8h, v0.8h, v0.8h
+umaxp v0.16b, v0.16b, v0.16b
+umaxp v0.4s, v0.4s, v0.4s
+umaxp v0.8h, v0.8h, v0.8h
+umaxv b0, v0.8b
+umaxv b0, v0.16b
+umaxv h0, v0.4h
+umaxv h0, v0.8h
+umaxv s0, v0.4s
+umin v0.2s, v0.2s, v0.2s
+umin v0.4h, v0.4h, v0.4h
+umin v0.8b, v0.8b, v0.8b
+uminp v0.2s, v0.2s, v0.2s
+uminp v0.4h, v0.4h, v0.4h
+uminp v0.8b, v0.8b, v0.8b
+uminv b0, v0.8b
+uminv b0, v0.16b
+uminv h0, v0.4h
+uminv h0, v0.8h
+uminv s0, v0.4s
+umlal v0.2d, v0.2s, v0.2s
+umlal v0.4s, v0.4h, v0.4h
+umlal v0.8h, v0.8b, v0.8b
+umlal2 v0.2d, v0.4s, v0.4s
+umlal2 v0.4s, v0.8h, v0.8h
+umlal2 v0.8h, v0.16b, v0.16b
+umlsl v0.2d, v0.2s, v0.2s
+umlsl v0.4s, v0.4h, v0.4h
+umlsl v0.8h, v0.8b, v0.8b
+umlsl2 v0.2d, v0.4s, v0.4s
+umlsl2 v0.4s, v0.8h, v0.8h
+umlsl2 v0.8h, v0.16b, v0.16b
+ummla v0.4s, v0.16b, v0.16b
+umov w0, v0.b[1]
+umov w0, v0.h[1]
+umov w0, v0.s[1]
+umov x0, v0.d[1]
+umull v0.2d, v0.2s, v0.2s
+umull v0.4s, v0.4h, v0.4h
+umull v0.8h, v0.8b, v0.8b
+umull2 v0.2d, v0.4s, v0.4s
+umull2 v0.4s, v0.8h, v0.8h
+umull2 v0.8h, v0.16b, v0.16b
+uqadd h0, h1, h5
+uqadd v0.8h, v0.8h, v0.8h
+uqrshl b11, b20, b30
+uqrshl s23, s20, s16
+uqrshl v0.16b, v0.16b, v0.16b
+uqrshl v0.4s, v0.4s, v0.4s
+uqrshl v0.4s, v0.4s, v0.4s
+uqrshl v0.8h, v0.8h, v0.8h
+uqrshrn b10, h12, #5
+uqrshrn h12, s10, #14
+uqrshrn s10, d10, #25
+uqrshrn v0.2s, v0.2d, #3
+uqrshrn v0.4h, v0.4s, #3
+uqrshrn v0.8b, v0.8h, #3
+uqrshrn2 v0.16b, v0.8h, #3
+uqrshrn2 v0.4s, v0.2d, #3
+uqrshrn2 v0.8h, v0.4s, #3
+uqshl b11, b20, b30
+uqshl b18, b15, #6
+uqshl d15, d12, #19
+uqshl h11, h18, #7
+uqshl s14, s19, #18
+uqshl s23, s20, s16
+uqshl v0.16b, v0.16b, #3
+uqshl v0.16b, v0.16b, v0.16b
+uqshl v0.2d, v0.2d, #3
+uqshl v0.2d, v0.2d, v0.2d
+uqshl v0.2s, v0.2s, #3
+uqshl v0.4h, v0.4h, #3
+uqshl v0.4s, v0.4s, #3
+uqshl v0.4s, v0.4s, v0.4s
+uqshl v0.8b, v0.8b, #3
+uqshl v0.8h, v0.8h, #3
+uqshl v0.8h, v0.8h, v0.8h
+uqshrn b12, h10, #7
+uqshrn h10, s14, #5
+uqshrn s10, d12, #13
+uqshrn v0.2s, v0.2d, #3
+uqshrn v0.4h, v0.4s, #3
+uqshrn v0.8b, v0.8h, #3
+uqshrn2 v0.16b, v0.8h, #3
+uqshrn2 v0.4s, v0.2d, #3
+uqshrn2 v0.8h, v0.4s, #3
+uqsub d16, d16, d16
+uqsub v0.4h, v0.4h, v0.4h
+uqxtn b18, h18
+uqxtn h20, s17
+uqxtn s19, d14
+uqxtn v0.2s, v0.2d
+uqxtn v0.4h, v0.4s
+uqxtn v0.8b, v0.8h
+uqxtn2 v0.16b, v0.8h
+uqxtn2 v0.4s, v0.2d
+uqxtn2 v0.8h, v0.4s
+urecpe v0.2s, v0.2s
+urecpe v0.4s, v0.4s
+urhadd v0.16b, v0.16b, v0.16b
+urhadd v0.4s, v0.4s, v0.4s
+urhadd v0.8h, v0.8h, v0.8h
+urshl d8, d7, d4
+urshl v0.16b, v0.16b, v0.16b
+urshl v0.2d, v0.2d, v0.2d
+urshl v0.4s, v0.4s, v0.4s
+urshl v0.8h, v0.8h, v0.8h
+urshr d20, d23, #31
+urshr v0.16b, v0.16b, #3
+urshr v0.2d, v0.2d, #3
+urshr v0.2s, v0.2s, #3
+urshr v0.4h, v0.4h, #3
+urshr v0.4s, v0.4s, #3
+urshr v0.8b, v0.8b, #3
+urshr v0.8h, v0.8h, #3
+ursqrte v0.2s, v0.2s
+ursqrte v0.4s, v0.4s
+ursra d18, d10, #13
+ursra v0.16b, v0.16b, #3
+ursra v0.2d, v0.2d, #3
+ursra v0.2s, v0.2s, #3
+ursra v0.4h, v0.4h, #3
+ursra v0.4s, v0.4s, #3
+ursra v0.8b, v0.8b, #3
+ursra v0.8h, v0.8h, #3
+usdot v0.2s, v0.8b, v0.4b[2]
+usdot v0.2s, v0.8b, v0.8b
+usdot v0.4s, v0.16b, v0.16b
+usdot v0.4s, v0.16b, v0.4b[2]
+ushl d0, d0, d0
+ushl v0.16b, v0.16b, v0.16b
+ushl v0.4s, v0.4s, v0.4s
+ushl v0.8h, v0.8h, v0.8h
+ushll v0.4s, v0.4h, #3
+ushll2 v0.8h, v0.16b, #3
+ushr d10, d17, #18
+ushr v0.16b, v0.16b, #3
+ushr v0.2d, v0.2d, #3
+ushr v0.2s, v0.2s, #3
+ushr v0.4h, v0.4h, #3
+ushr v0.4s, v0.4s, #3
+ushr v0.8b, v0.8b, #3
+ushr v0.8h, v0.8h, #3
+usmmla v0.4s, v0.16b, v0.16b
+smov w0, v0.b[1]
+smov w0, v0.h[1]
+smov x0, v0.b[1]
+smov x0, v0.h[1]
+smov x0, v0.s[1]
+usqadd b19, b14
+usqadd d18, d22
+usqadd h20, h15
+usqadd s21, s12
+usqadd v0.16b, v0.16b
+usqadd v0.2d, v0.2d
+usqadd v0.2s, v0.2s
+usqadd v0.4h, v0.4h
+usqadd v0.4s, v0.4s
+usqadd v0.8b, v0.8b
+usqadd v0.8h, v0.8h
+usra d20, d13, #61
+usra v0.16b, v0.16b, #3
+usra v0.2d, v0.2d, #3
+usra v0.2s, v0.2s, #3
+usra v0.4h, v0.4h, #3
+usra v0.4s, v0.4s, #3
+usra v0.8b, v0.8b, #3
+usra v0.8h, v0.8h, #3
+usubl v0.2d, v0.2s, v0.2s
+usubl v0.4s, v0.4h, v0.4h
+usubl v0.8h, v0.8b, v0.8b
+usubl2 v0.2d, v0.4s, v0.4s
+usubl2 v0.4s, v0.8h, v0.8h
+usubl2 v0.8h, v0.16b, v0.16b
+usubw v0.2d, v0.2d, v0.2s
+usubw v0.4s, v0.4s, v0.4h
+usubw v0.8h, v0.8h, v0.8b
+usubw2 v0.2d, v0.2d, v0.4s
+usubw2 v0.4s, v0.4s, v0.8h
+usubw2 v0.8h, v0.8h, v0.16b
+uzp1 v0.16b, v0.16b, v0.16b
+uzp1 v0.2d, v0.2d, v0.2d
+uzp1 v0.2s, v0.2s, v0.2s
+uzp1 v0.4h, v0.4h, v0.4h
+uzp1 v0.4s, v0.4s, v0.4s
+uzp1 v0.8b, v0.8b, v0.8b
+uzp1 v0.8h, v0.8h, v0.8h
+uzp2 v0.16b, v0.16b, v0.16b
+uzp2 v0.2d, v0.2d, v0.2d
+uzp2 v0.2s, v0.2s, v0.2s
+uzp2 v0.4h, v0.4h, v0.4h
+uzp2 v0.4s, v0.4s, v0.4s
+uzp2 v0.8b, v0.8b, v0.8b
+uzp2 v0.8h, v0.8h, v0.8h
+xtn v0.2s, v0.2d
+xtn v0.4h, v0.4s
+xtn v0.8b, v0.8h
+xtn2 v0.16b, v0.8h
+xtn2 v0.4s, v0.2d
+xtn2 v0.8h, v0.4s
+zip1 v0.16b, v0.16b, v0.16b
+zip1 v0.2d, v0.2d, v0.2d
+zip1 v0.2s, v0.2s, v0.2s
+zip1 v0.4h, v0.4h, v0.4h
+zip1 v0.4s, v0.4s, v0.4s
+zip1 v0.8b, v0.8b, v0.8b
+zip1 v0.8h, v0.8h, v0.8h
+zip2 v0.16b, v0.16b, v0.16b
+zip2 v0.2d, v0.2d, v0.2d
+zip2 v0.2s, v0.2s, v0.2s
+zip2 v0.4h, v0.4h, v0.4h
+zip2 v0.4s, v0.4s, v0.4s
+zip2 v0.8b, v0.8b, v0.8b
+zip2 v0.8h, v0.8h, v0.8h
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 2 0.25 abs d29, d24
+# CHECK-NEXT: 1 2 0.25 abs v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 abs v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 abs v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 abs v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 abs v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 abs v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 abs v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 add d17, d31, d29
+# CHECK-NEXT: 1 2 0.25 add v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 addhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 addhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 addhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 addhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 addhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 addhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 addp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 addp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.50 addv s0, v0.4s
+# CHECK-NEXT: 1 3 0.50 addv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 addv h0, v0.8h
+# CHECK-NEXT: 2 5 0.50 addv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 addv b0, v0.16b
+# CHECK-NEXT: 1 2 0.25 aesd v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 aese v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 aesimc v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 aesmc v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 and v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.50 bfcvt h0, s0
+# CHECK-NEXT: 2 4 1.00 bfcvtn v0.4h, v0.4s
+# CHECK-NEXT: 2 4 1.00 bfcvtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 5 0.25 bfdot v0.2s, v0.4h, v0.4h
+# CHECK-NEXT: 1 5 0.25 bfdot v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 5 0.25 bfmlalb v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 5 0.25 bfmlalb v0.4s, v0.8h, v0.h[3]
+# CHECK-NEXT: 1 5 0.25 bfmlalt v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 5 0.25 bfmlalt v0.4s, v0.8h, v0.h[3]
+# CHECK-NEXT: 1 6 0.25 bfmmla v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 bic v0.4h, #15, lsl #8
+# CHECK-NEXT: 1 2 0.25 bic v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 bif v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 bit v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 bsl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 cls v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 cls v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 cls v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 cls v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 cls v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 cls v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 clz v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 clz v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 clz v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 clz v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 clz v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 clz v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 cmeq d20, d21, #0
+# CHECK-NEXT: 1 2 0.25 cmeq d20, d21, d22
+# CHECK-NEXT: 1 2 0.25 cmeq v0.16b, v0.16b, #0
+# CHECK-NEXT: 1 2 0.25 cmeq v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 cmge d20, d21, #0
+# CHECK-NEXT: 1 2 0.25 cmge d20, d21, d22
+# CHECK-NEXT: 1 2 0.25 cmge v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 cmge v0.8b, v0.8b, #0
+# CHECK-NEXT: 1 2 0.25 cmgt d20, d21, #0
+# CHECK-NEXT: 1 2 0.25 cmgt d20, d21, d22
+# CHECK-NEXT: 1 2 0.25 cmgt v0.2s, v0.2s, #0
+# CHECK-NEXT: 1 2 0.25 cmgt v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 cmhi d20, d21, d22
+# CHECK-NEXT: 1 2 0.25 cmhi v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 cmhs d20, d21, d22
+# CHECK-NEXT: 1 2 0.25 cmhs v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 cmle d20, d21, #0
+# CHECK-NEXT: 1 2 0.25 cmle v0.2d, v0.2d, #0
+# CHECK-NEXT: 1 2 0.25 cmlt d20, d21, #0
+# CHECK-NEXT: 1 2 0.25 cmlt v0.8h, v0.8h, #0
+# CHECK-NEXT: 1 2 0.25 cmtst d20, d21, d22
+# CHECK-NEXT: 1 2 0.25 cmtst v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 cnt v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 cnt v0.8b, v0.8b
+# CHECK-NEXT: 1 3 1.00 dup v0.16b, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.2d, x28
+# CHECK-NEXT: 1 3 1.00 dup v0.2s, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.4h, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.4s, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.8b, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.8h, w28
+# CHECK-NEXT: 1 2 0.25 mov b0, v0.b[1]
+# CHECK-NEXT: 1 2 0.25 mov d0, v0.d[1]
+# CHECK-NEXT: 1 2 0.25 mov h0, v0.h[1]
+# CHECK-NEXT: 1 2 0.25 mov s0, v0.s[1]
+# CHECK-NEXT: 1 2 0.25 dup v0.16b, v0.b[1]
+# CHECK-NEXT: 1 2 0.25 dup v0.2d, v0.d[1]
+# CHECK-NEXT: 1 2 0.25 dup v0.2s, v0.s[1]
+# CHECK-NEXT: 1 2 0.25 dup v0.4h, v0.h[1]
+# CHECK-NEXT: 1 2 0.25 dup v0.4s, v0.s[1]
+# CHECK-NEXT: 1 2 0.25 dup v0.8b, v0.b[1]
+# CHECK-NEXT: 1 2 0.25 dup v0.8h, v0.h[1]
+# CHECK-NEXT: 1 2 0.25 eor v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 ext v0.16b, v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.25 ext v0.8b, v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 2 0.25 fabd d29, d24, d20
+# CHECK-NEXT: 1 2 0.25 fabd s29, s24, s20
+# CHECK-NEXT: 1 2 0.25 fabd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 fabs v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 fabs v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 fabs v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 fabs v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 fabs v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 facge d20, d21, d22
+# CHECK-NEXT: 1 2 0.25 facge s10, s11, s12
+# CHECK-NEXT: 1 2 0.25 facge v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 facgt d20, d21, d22
+# CHECK-NEXT: 1 2 0.25 facgt s10, s11, s12
+# CHECK-NEXT: 1 2 0.25 facgt v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 fadd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 faddp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 faddp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 fcadd v0.2s, v0.2s, v0.2s, #90
+# CHECK-NEXT: 1 2 0.25 fcadd v0.4s, v0.4s, v0.4s, #270
+# CHECK-NEXT: 1 2 0.25 fcmeq d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmeq d20, d21, d22
+# CHECK-NEXT: 1 2 0.25 fcmeq s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmeq s10, s11, s12
+# CHECK-NEXT: 1 2 0.25 fcmeq v0.2s, v0.2s, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmeq v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 fcmge d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmge d20, d21, d22
+# CHECK-NEXT: 1 2 0.25 fcmge s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmge s10, s11, s12
+# CHECK-NEXT: 1 2 0.25 fcmge v0.2d, v0.2d, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmge v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 fcmgt d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmgt d20, d21, d22
+# CHECK-NEXT: 1 2 0.25 fcmgt s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmgt s10, s11, s12
+# CHECK-NEXT: 1 2 0.25 fcmgt v0.4s, v0.4s, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmgt v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.25 fcmla v0.2s, v0.2s, v0.2s, #90
+# CHECK-NEXT: 1 4 0.25 fcmla v0.4s, v0.4s, v0.s[1], #0
+# CHECK-NEXT: 1 2 0.25 fcmle d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmle s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmle v0.2d, v0.2d, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmlt d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmlt s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.25 fcmlt v0.4s, v0.4s, #0.0
+# CHECK-NEXT: 1 3 0.50 fcvtas d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtas s12, s13
+# CHECK-NEXT: 4 6 2.00 fcvtas h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtas v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 fcvtas v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 fcvtas v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 fcvtas v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 fcvtas v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 fcvtau d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtau s12, s13
+# CHECK-NEXT: 4 6 2.00 fcvtau h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtau v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 fcvtau v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 fcvtau v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 fcvtau v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 fcvtau v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 fcvtl v0.2d, v0.2s
+# CHECK-NEXT: 2 4 1.00 fcvtl v0.4s, v0.4h
+# CHECK-NEXT: 1 3 0.50 fcvtl2 v0.2d, v0.4s
+# CHECK-NEXT: 2 4 1.00 fcvtl2 v0.4s, v0.8h
+# CHECK-NEXT: 1 3 0.50 fcvtms d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtms s22, s13
+# CHECK-NEXT: 4 6 2.00 fcvtms h22, h13
+# CHECK-NEXT: 1 3 0.50 fcvtms v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 fcvtms v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 fcvtms v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 fcvtms v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 fcvtms v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 fcvtmu d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtmu s12, s13
+# CHECK-NEXT: 4 6 2.00 fcvtmu h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtmu v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 fcvtmu v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 fcvtmu v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 fcvtmu v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 fcvtmu v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 fcvtn v0.2s, v0.2d
+# CHECK-NEXT: 2 4 1.00 fcvtn v0.4h, v0.4s
+# CHECK-NEXT: 1 3 0.50 fcvtn2 v0.4s, v0.2d
+# CHECK-NEXT: 2 4 1.00 fcvtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 3 0.50 fcvtns d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtns s22, s13
+# CHECK-NEXT: 4 6 2.00 fcvtns h22, h13
+# CHECK-NEXT: 1 3 0.50 fcvtns v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 fcvtns v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 fcvtns v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 fcvtns v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 fcvtns v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 fcvtnu d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtnu s12, s13
+# CHECK-NEXT: 4 6 2.00 fcvtnu h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtnu v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 fcvtnu v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 fcvtnu v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 fcvtnu v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 fcvtnu v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 fcvtps d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtps s22, s13
+# CHECK-NEXT: 4 6 2.00 fcvtps h22, h13
+# CHECK-NEXT: 1 3 0.50 fcvtps v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 fcvtps v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 fcvtps v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 fcvtps v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 fcvtps v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 fcvtpu d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtpu s12, s13
+# CHECK-NEXT: 4 6 2.00 fcvtpu h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtpu v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 fcvtpu v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 fcvtpu v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 fcvtpu v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 fcvtpu v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 fcvtxn s22, d13
+# CHECK-NEXT: 1 3 0.50 fcvtxn v0.2s, v0.2d
+# CHECK-NEXT: 1 3 0.50 fcvtxn2 v0.4s, v0.2d
+# CHECK-NEXT: 1 3 0.50 fcvtzs d21, d12, #1
+# CHECK-NEXT: 1 3 0.50 fcvtzs d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtzs s12, s13
+# CHECK-NEXT: 2 4 1.00 fcvtzs s21, s12, #1
+# CHECK-NEXT: 4 6 2.00 fcvtzs h21, h14
+# CHECK-NEXT: 4 6 2.00 fcvtzs h21, h12, #1
+# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2s, v0.2s, #3
+# CHECK-NEXT: 2 4 1.00 fcvtzs v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 fcvtzs v0.4s, v0.4s
+# CHECK-NEXT: 2 4 1.00 fcvtzs v0.4s, v0.4s, #3
+# CHECK-NEXT: 4 6 2.00 fcvtzs v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 fcvtzu d21, d12, #1
+# CHECK-NEXT: 1 3 0.50 fcvtzu d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtzu s12, s13
+# CHECK-NEXT: 2 4 1.00 fcvtzu s21, s12, #1
+# CHECK-NEXT: 4 6 2.00 fcvtzu h12, h13
+# CHECK-NEXT: 4 6 2.00 fcvtzu h21, h12, #1
+# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2s, v0.2s, #3
+# CHECK-NEXT: 2 4 1.00 fcvtzu v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 fcvtzu v0.4s, v0.4s
+# CHECK-NEXT: 2 4 1.00 fcvtzu v0.4s, v0.4s, #3
+# CHECK-NEXT: 4 6 2.00 fcvtzu v0.8h, v0.8h
+# CHECK-NEXT: 1 14 2.00 fdiv v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 9 2.00 fdiv v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 9 4.00 fdiv v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 11 4.00 fdiv v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 13 8.00 fdiv v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 fmax v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 fmax v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 fmax v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 fmaxnm v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 fmaxnm v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 fmaxnm v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 fmaxnmp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 fmaxnmp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 fmaxnmp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 fmaxp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 fmaxp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 fmaxp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 2 4 0.50 fmaxv h0, v0.4h
+# CHECK-NEXT: 3 6 0.75 fmaxv h0, v0.8h
+# CHECK-NEXT: 2 4 0.50 fmaxv s0, v0.4s
+# CHECK-NEXT: 1 2 0.25 fmin v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 fmin v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 fmin v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 fminnm v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 fminnm v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 fminnm v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 fminnmp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 fminnmp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 fminnmp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 fminp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 fminp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 fminp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.25 fmla d0, d1, v0.d[1]
+# CHECK-NEXT: 1 4 0.25 fmla s0, s1, v0.s[3]
+# CHECK-NEXT: 1 4 0.25 fmla v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.25 fmlal v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: 1 4 0.25 fmlal v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: 1 4 0.25 fmlal v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: 1 4 0.25 fmlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.25 fmlal2 v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: 1 4 0.25 fmlal2 v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: 1 4 0.25 fmlal2 v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: 1 4 0.25 fmlal2 v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.25 fmls d0, d4, v0.d[1]
+# CHECK-NEXT: 1 4 0.25 fmls s3, s5, v0.s[3]
+# CHECK-NEXT: 1 4 0.25 fmls v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.25 fmlsl v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: 1 4 0.25 fmlsl v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: 1 4 0.25 fmlsl v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: 1 4 0.25 fmlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.25 fmlsl2 v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: 1 4 0.25 fmlsl2 v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: 1 4 0.25 fmlsl2 v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: 1 4 0.25 fmlsl2 v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 fmov v0.2d, #-1.25000000
+# CHECK-NEXT: 1 2 0.25 fmov v0.2s, #13.00000000
+# CHECK-NEXT: 1 2 0.25 fmov v0.4s, #1.00000000
+# CHECK-NEXT: 1 3 0.25 fmul d0, d1, v0.d[1]
+# CHECK-NEXT: 1 3 0.25 fmul s0, s1, v0.s[3]
+# CHECK-NEXT: 1 3 0.25 fmul v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 fmulx d0, d4, v0.d[1]
+# CHECK-NEXT: 1 2 0.25 fmulx d23, d11, d1
+# CHECK-NEXT: 1 2 0.25 fmulx s20, s22, s15
+# CHECK-NEXT: 1 3 0.25 fmulx s3, s5, v0.s[3]
+# CHECK-NEXT: 1 3 0.25 fmulx v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 fmulx v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.25 fmulx v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 fneg v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 fneg v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 fneg v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 fneg v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 fneg v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 frecpe d13, d13
+# CHECK-NEXT: 1 3 0.50 frecpe s19, s14
+# CHECK-NEXT: 1 2 0.25 frecpe v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frecpe v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frecpe v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 frecpe v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 frecpe v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.25 frecps v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.25 frecps d22, d30, d21
+# CHECK-NEXT: 1 4 0.25 frecps s21, s16, s13
+# CHECK-NEXT: 1 3 0.50 frecpx d16, d19
+# CHECK-NEXT: 1 3 0.50 frecpx s18, s10
+# CHECK-NEXT: 1 3 0.50 frint32x v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frint32x v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frint32x v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.50 frint32z v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frint32z v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frint32z v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.50 frint64x v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frint64x v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frint64x v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.50 frint64z v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frint64z v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frint64z v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.50 frinta v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frinta v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frinta v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 frinta v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 frinta v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 frinti v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frinti v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frinti v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 frinti v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 frinti v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 frintm v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frintm v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frintm v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 frintm v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 frintm v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 frintn v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frintn v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frintn v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 frintn v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 frintn v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 frintp v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frintp v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frintp v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 frintp v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 frintp v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 frintx v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frintx v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frintx v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 frintx v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 frintx v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 frintz v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frintz v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frintz v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 frintz v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 frintz v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 frsqrte d21, d12
+# CHECK-NEXT: 1 3 0.50 frsqrte s22, s13
+# CHECK-NEXT: 1 2 0.25 frsqrte v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 frsqrte v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 frsqrte v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 frsqrte v0.4s, v0.4s
+# CHECK-NEXT: 4 6 2.00 frsqrte v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.25 frsqrts d8, d22, d18
+# CHECK-NEXT: 1 4 0.25 frsqrts s21, s5, s12
+# CHECK-NEXT: 1 4 0.25 frsqrts v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 14 2.00 fsqrt v0.2d, v0.2d
+# CHECK-NEXT: 1 9 2.00 fsqrt v0.2s, v0.2s
+# CHECK-NEXT: 1 9 4.00 fsqrt v0.4h, v0.4h
+# CHECK-NEXT: 1 11 4.00 fsqrt v0.4s, v0.4s
+# CHECK-NEXT: 1 13 8.00 fsqrt v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 fsub v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 6 0.33 * ld1 { v0.16b }, [x0]
+# CHECK-NEXT: 4 6 1.00 * ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK-NEXT: 4 7 1.33 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+# CHECK-NEXT: 3 6 0.67 * ld1 { v0.4s, v1.4s }, [sp], #32
+# CHECK-NEXT: 3 6 1.00 * ld1 { v0.4s, v1.4s, v2.4s }, [sp]
+# CHECK-NEXT: 5 7 1.33 * ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+# CHECK-NEXT: 2 6 0.33 * ld1 { v0.8h }, [x15], x2
+# CHECK-NEXT: 2 6 0.67 * ld1 { v0.8h, v1.8h }, [x15]
+# CHECK-NEXT: 2 8 0.33 * ld1 { v0.b }[9], [x0]
+# CHECK-NEXT: 3 8 0.33 * ld1 { v0.b }[9], [x0], #1
+# CHECK-NEXT: 2 8 0.33 * ld1r { v0.16b }, [x0]
+# CHECK-NEXT: 3 8 0.33 * ld1r { v0.16b }, [x0], #1
+# CHECK-NEXT: 2 8 0.33 * ld1r { v0.8h }, [x15]
+# CHECK-NEXT: 3 8 0.33 * ld1r { v0.8h }, [x15], #2
+# CHECK-NEXT: 5 8 0.67 * ld2 { v0.16b, v1.16b }, [x0], x1
+# CHECK-NEXT: 3 8 0.50 * ld2 { v0.8b, v1.8b }, [x0]
+# CHECK-NEXT: 3 8 0.50 * ld2 { v0.h, v1.h }[7], [x15]
+# CHECK-NEXT: 4 8 0.50 * ld2 { v0.h, v1.h }[7], [x15], #4
+# CHECK-NEXT: 3 8 0.50 * ld2r { v0.2d, v1.2d }, [x0]
+# CHECK-NEXT: 4 8 0.50 * ld2r { v0.2d, v1.2d }, [x0], #16
+# CHECK-NEXT: 3 8 0.50 * ld2r { v0.4s, v1.4s }, [sp]
+# CHECK-NEXT: 4 8 0.50 * ld2r { v0.4s, v1.4s }, [sp], #8
+# CHECK-NEXT: 5 8 0.75 * ld3 { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: 7 8 1.00 * ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+# CHECK-NEXT: 5 8 0.75 * ld3 { v0.s, v1.s, v2.s }[3], [sp]
+# CHECK-NEXT: 6 8 0.75 * ld3 { v0.s, v1.s, v2.s }[3], [sp], x3
+# CHECK-NEXT: 5 8 0.75 * ld3r { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: 6 8 0.75 * ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6
+# CHECK-NEXT: 5 8 0.75 * ld3r { v0.8b, v1.8b, v2.8b }, [x0]
+# CHECK-NEXT: 6 8 0.75 * ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3
+# CHECK-NEXT: 7 8 1.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: 11 9 2.00 * ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: 7 8 1.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
+# CHECK-NEXT: 8 8 1.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32
+# CHECK-NEXT: 8 8 1.00 * ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
+# CHECK-NEXT: 7 8 1.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp]
+# CHECK-NEXT: 8 8 1.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7
+# CHECK-NEXT: 7 8 1.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: 8 8 1.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30
+# CHECK-NEXT: 1 4 0.50 mla v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.50 mls v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 mov b0, v0.b[15]
+# CHECK-NEXT: 1 2 0.25 mov d6, v0.d[1]
+# CHECK-NEXT: 1 2 0.25 mov h2, v0.h[5]
+# CHECK-NEXT: 1 2 0.25 mov s17, v0.s[2]
+# CHECK-NEXT: 1 2 0.25 mov v2.b[0], v0.b[0]
+# CHECK-NEXT: 1 2 0.25 mov v2.h[1], v0.h[1]
+# CHECK-NEXT: 1 2 0.25 mov v2.s[2], v0.s[2]
+# CHECK-NEXT: 1 2 0.25 mov v2.d[1], v0.d[1]
+# CHECK-NEXT: 2 5 1.00 mov v0.b[0], w8
+# CHECK-NEXT: 2 5 1.00 mov v0.h[1], w8
+# CHECK-NEXT: 2 5 1.00 mov v0.s[2], w8
+# CHECK-NEXT: 2 5 1.00 mov v0.d[1], x8
+# CHECK-NEXT: 1 2 0.25 mov v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 mov v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 movi d15, #0xff00ff00ff00ff
+# CHECK-NEXT: 1 2 0.25 movi v0.16b, #31
+# CHECK-NEXT: 1 2 0.25 movi v0.2d, #0xff0000ff0000ffff
+# CHECK-NEXT: 1 2 0.25 movi v0.2s, #8, msl #8
+# CHECK-NEXT: 1 2 0.25 movi v0.4s, #255, lsl #24
+# CHECK-NEXT: 1 2 0.25 movi v0.8b, #255
+# CHECK-NEXT: 1 4 0.50 mul v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 mvni v0.2s, #0
+# CHECK-NEXT: 1 2 0.25 mvni v0.4s, #16, msl #16
+# CHECK-NEXT: 1 2 0.25 neg d29, d24
+# CHECK-NEXT: 1 2 0.25 neg v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 neg v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 neg v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 neg v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 neg v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 neg v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 neg v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 mvn v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 mvn v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 orn v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 mov v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 orr v0.8h, #31
+# CHECK-NEXT: 1 3 0.25 pmul v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.25 pmul v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.25 pmull v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.25 pmull2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 raddhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 raddhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 raddhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 raddhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 raddhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 raddhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 rbit v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 rbit v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 rev16 v21.8b, v1.8b
+# CHECK-NEXT: 1 2 0.25 rev16 v30.16b, v31.16b
+# CHECK-NEXT: 1 2 0.25 rev32 v0.4h, v9.4h
+# CHECK-NEXT: 1 2 0.25 rev32 v21.8b, v1.8b
+# CHECK-NEXT: 1 2 0.25 rev32 v30.16b, v31.16b
+# CHECK-NEXT: 1 2 0.25 rev32 v4.8h, v7.8h
+# CHECK-NEXT: 1 2 0.25 rev64 v0.16b, v31.16b
+# CHECK-NEXT: 1 2 0.25 rev64 v1.8b, v9.8b
+# CHECK-NEXT: 1 2 0.25 rev64 v13.4h, v21.4h
+# CHECK-NEXT: 1 2 0.25 rev64 v2.8h, v4.8h
+# CHECK-NEXT: 1 2 0.25 rev64 v4.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 rev64 v6.4s, v8.4s
+# CHECK-NEXT: 1 4 0.25 rshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 rshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 rshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 rshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 rshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 rshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 2 0.25 rsubhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 rsubhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 rsubhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 rsubhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 rsubhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 rsubhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.25 saba v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.25 sabal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.25 sabal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.25 sabal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.25 sabal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.25 sabal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.25 sabal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 sabd v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 sabdl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 sabdl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 sabdl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 sabdl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 sabdl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 sabdl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.25 sadalp v0.1d, v0.2s
+# CHECK-NEXT: 1 4 0.25 sadalp v0.2d, v0.4s
+# CHECK-NEXT: 1 4 0.25 sadalp v0.2s, v0.4h
+# CHECK-NEXT: 1 4 0.25 sadalp v0.4h, v0.8b
+# CHECK-NEXT: 1 4 0.25 sadalp v0.4s, v0.8h
+# CHECK-NEXT: 1 4 0.25 sadalp v0.8h, v0.16b
+# CHECK-NEXT: 1 2 0.25 saddl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 saddl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 saddl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 saddl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 saddl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 saddl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 saddlp v0.1d, v0.2s
+# CHECK-NEXT: 1 2 0.25 saddlp v0.2d, v0.4s
+# CHECK-NEXT: 1 2 0.25 saddlp v0.2s, v0.4h
+# CHECK-NEXT: 1 2 0.25 saddlp v0.4h, v0.8b
+# CHECK-NEXT: 1 2 0.25 saddlp v0.4s, v0.8h
+# CHECK-NEXT: 1 2 0.25 saddlp v0.8h, v0.16b
+# CHECK-NEXT: 1 3 0.50 saddlv d0, v0.4s
+# CHECK-NEXT: 1 3 0.50 saddlv s0, v0.4h
+# CHECK-NEXT: 2 5 0.50 saddlv s0, v0.8h
+# CHECK-NEXT: 2 5 0.50 saddlv h0, v0.8b
+# CHECK-NEXT: 2 6 1.00 saddlv h0, v0.16b
+# CHECK-NEXT: 1 2 0.25 saddw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: 1 2 0.25 saddw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: 1 2 0.25 saddw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: 1 2 0.25 saddw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: 1 2 0.25 saddw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: 1 2 0.25 saddw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: 1 3 0.50 scvtf d21, d12
+# CHECK-NEXT: 1 3 0.50 scvtf d21, d12, #64
+# CHECK-NEXT: 2 4 1.00 scvtf s22, s13
+# CHECK-NEXT: 2 4 1.00 scvtf s22, s13, #32
+# CHECK-NEXT: 1 3 0.50 scvtf v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 scvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.50 scvtf v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.50 scvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: 2 4 1.00 scvtf v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 scvtf v0.4s, v0.4s
+# CHECK-NEXT: 2 4 1.00 scvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: 4 6 2.00 scvtf v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 sdot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: 1 3 0.25 sdot v0.2s, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.25 sdot v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.25 sdot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: 1 2 0.25 shadd v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 shl d7, d10, #12
+# CHECK-NEXT: 1 2 0.25 shl v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.25 shl v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 2 0.25 shl v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 2 0.25 shl v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 2 0.25 shll v0.2d, v0.2s, #32
+# CHECK-NEXT: 1 2 0.25 shll v0.4s, v0.4h, #16
+# CHECK-NEXT: 1 2 0.25 shll v0.8h, v0.8b, #8
+# CHECK-NEXT: 1 2 0.25 shll v0.2d, v0.2s, #32
+# CHECK-NEXT: 1 2 0.25 shll v0.4s, v0.4h, #16
+# CHECK-NEXT: 1 2 0.25 shll v0.8h, v0.8b, #8
+# CHECK-NEXT: 1 2 0.25 shll2 v0.2d, v0.4s, #32
+# CHECK-NEXT: 1 2 0.25 shll2 v0.4s, v0.8h, #16
+# CHECK-NEXT: 1 2 0.25 shll2 v0.8h, v0.16b, #8
+# CHECK-NEXT: 1 2 0.25 shll2 v0.2d, v0.4s, #32
+# CHECK-NEXT: 1 2 0.25 shll2 v0.4s, v0.8h, #16
+# CHECK-NEXT: 1 2 0.25 shll2 v0.8h, v0.16b, #8
+# CHECK-NEXT: 1 2 0.25 shrn v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 2 0.25 shrn v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 2 0.25 shrn v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 2 0.25 shrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 2 0.25 shrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 2 0.25 shrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 2 0.25 shsub v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 shsub v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 sli d10, d14, #12
+# CHECK-NEXT: 1 2 0.25 sli v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.25 sli v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 2 0.25 sli v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 2 0.25 sli v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 2 0.25 sli v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 2 0.25 sli v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 2 0.25 sli v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 2 0.25 smax v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 smax v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 smax v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 smaxp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 smaxp v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 smaxp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 2 5 0.50 smaxv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 smaxv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 smaxv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 smaxv h0, v0.8h
+# CHECK-NEXT: 1 3 0.50 smaxv s0, v0.4s
+# CHECK-NEXT: 1 2 0.25 smin v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 smin v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 smin v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 sminp v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 sminp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 sminp v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 2 5 0.50 sminv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 sminv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 sminv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 sminv h0, v0.8h
+# CHECK-NEXT: 1 3 0.50 sminv s0, v0.4s
+# CHECK-NEXT: 1 4 0.50 smlal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 smlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 smlal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.50 smlal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 smlal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 smlal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.50 smlsl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 smlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 smlsl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.50 smlsl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 smlsl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 smlsl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.25 smmla v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.50 smull v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.50 smull v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.50 smull v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.50 smull2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.50 smull2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 smull2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 sqabs b19, b14
+# CHECK-NEXT: 1 2 0.25 sqabs d18, d12
+# CHECK-NEXT: 1 2 0.25 sqabs h21, h15
+# CHECK-NEXT: 1 2 0.25 sqabs s20, s12
+# CHECK-NEXT: 1 2 0.25 sqabs v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 sqabs v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 sqabs v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 sqabs v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 sqabs v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 sqabs v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 sqabs v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 sqadd b20, b11, b15
+# CHECK-NEXT: 1 2 0.25 sqadd v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 sqadd v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 sqdmlal d19, s24, s12
+# CHECK-NEXT: 1 4 0.50 sqdmlal d8, s9, v0.s[1]
+# CHECK-NEXT: 1 4 0.50 sqdmlal s0, h0, v0.h[3]
+# CHECK-NEXT: 1 4 0.50 sqdmlal s17, h27, h12
+# CHECK-NEXT: 1 4 0.50 sqdmlal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 sqdmlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 sqdmlal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 sqdmlal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 sqdmlsl d12, s23, s13
+# CHECK-NEXT: 1 4 0.50 sqdmlsl d8, s9, v0.s[1]
+# CHECK-NEXT: 1 4 0.50 sqdmlsl s0, h0, v0.h[3]
+# CHECK-NEXT: 1 4 0.50 sqdmlsl s14, h12, h25
+# CHECK-NEXT: 1 4 0.50 sqdmlsl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 sqdmlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 sqdmlsl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 sqdmlsl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 sqdmulh h10, h11, h12
+# CHECK-NEXT: 1 4 0.50 sqdmulh h7, h15, v0.h[3]
+# CHECK-NEXT: 1 4 0.50 sqdmulh s15, s14, v0.s[1]
+# CHECK-NEXT: 1 4 0.50 sqdmulh s20, s21, s2
+# CHECK-NEXT: 1 4 0.50 sqdmulh v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 sqdmulh v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.50 sqdmull d1, s1, v0.s[1]
+# CHECK-NEXT: 1 3 0.50 sqdmull d15, s22, s12
+# CHECK-NEXT: 1 3 0.50 sqdmull s1, h1, v0.h[3]
+# CHECK-NEXT: 1 3 0.50 sqdmull s12, h22, h12
+# CHECK-NEXT: 1 3 0.50 sqdmull v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.50 sqdmull v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.50 sqdmull2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.50 sqdmull2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 sqneg b19, b14
+# CHECK-NEXT: 1 2 0.25 sqneg d18, d12
+# CHECK-NEXT: 1 2 0.25 sqneg h21, h15
+# CHECK-NEXT: 1 2 0.25 sqneg s20, s12
+# CHECK-NEXT: 1 2 0.25 sqneg v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 sqneg v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 sqneg v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 sqneg v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 sqneg v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 sqneg v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 sqneg v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 sqrdmlah h0, h1, v2.h[3]
+# CHECK-NEXT: 1 4 1.00 sqrdmlah v0.4h, v1.4h, v2.h[3]
+# CHECK-NEXT: 1 4 1.00 sqrdmlah v0.8h, v1.8h, v2.h[3]
+# CHECK-NEXT: 1 4 1.00 sqrdmlah s0, s1, v2.s[1]
+# CHECK-NEXT: 1 4 1.00 sqrdmlah v0.2s, v1.2s, v2.s[1]
+# CHECK-NEXT: 1 4 1.00 sqrdmlah v0.4s, v1.4s, v2.s[1]
+# CHECK-NEXT: 1 4 1.00 sqrdmlah h0, h1, h2
+# CHECK-NEXT: 1 4 1.00 sqrdmlah v0.4h, v1.4h, v2.4h
+# CHECK-NEXT: 1 4 1.00 sqrdmlah v0.8h, v1.8h, v2.8h
+# CHECK-NEXT: 1 4 1.00 sqrdmlah s0, s1, s2
+# CHECK-NEXT: 1 4 1.00 sqrdmlah v0.2s, v1.2s, v2.2s
+# CHECK-NEXT: 1 4 1.00 sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh h0, h1, v2.h[3]
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh v0.4h, v1.4h, v2.h[3]
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh v0.8h, v1.8h, v2.h[3]
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh s0, s1, v2.s[1]
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh v0.2s, v1.2s, v2.s[1]
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh v0.4s, v1.4s, v2.s[1]
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh h0, h1, h2
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh v0.4h, v1.4h, v2.4h
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh v0.8h, v1.8h, v2.8h
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh s0, s1, s2
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh v0.2s, v1.2s, v2.2s
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 1 4 0.50 sqrdmulh h10, h11, h12
+# CHECK-NEXT: 1 4 0.50 sqrdmulh h7, h15, v0.h[3]
+# CHECK-NEXT: 1 4 0.50 sqrdmulh s15, s14, v0.s[1]
+# CHECK-NEXT: 1 4 0.50 sqrdmulh s20, s21, s2
+# CHECK-NEXT: 1 4 0.50 sqrdmulh v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 sqrdmulh v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.25 sqrshl d31, d31, d31
+# CHECK-NEXT: 1 4 0.25 sqrshl h3, h4, h15
+# CHECK-NEXT: 1 4 0.25 sqrshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.25 sqrshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.25 sqrshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.25 sqrshrn b10, h13, #2
+# CHECK-NEXT: 1 4 0.25 sqrshrn h15, s10, #6
+# CHECK-NEXT: 1 4 0.25 sqrshrn s15, d12, #9
+# CHECK-NEXT: 1 4 0.25 sqrshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 sqrshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 sqrshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 sqrshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 sqrshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 sqrshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 sqrshrun b17, h10, #6
+# CHECK-NEXT: 1 4 0.25 sqrshrun h10, s13, #15
+# CHECK-NEXT: 1 4 0.25 sqrshrun s22, d16, #31
+# CHECK-NEXT: 1 4 0.25 sqrshrun v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 sqrshrun v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 sqrshrun v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 sqrshrun2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 sqrshrun2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 sqrshrun2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 sqshl b11, b19, #7
+# CHECK-NEXT: 1 4 0.25 sqshl d15, d16, #51
+# CHECK-NEXT: 1 4 0.25 sqshl d31, d31, d31
+# CHECK-NEXT: 1 4 0.25 sqshl h13, h18, #11
+# CHECK-NEXT: 1 4 0.25 sqshl h3, h4, h15
+# CHECK-NEXT: 1 4 0.25 sqshl s14, s17, #22
+# CHECK-NEXT: 1 4 0.25 sqshl v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.25 sqshl v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 sqshl v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.25 sqshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.25 sqshl v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.25 sqshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.25 sqshl v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 sqshl v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.25 sqshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.25 sqshl v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 sqshlu b15, b18, #6
+# CHECK-NEXT: 1 4 0.25 sqshlu d11, d13, #32
+# CHECK-NEXT: 1 4 0.25 sqshlu h19, h17, #6
+# CHECK-NEXT: 1 4 0.25 sqshlu s16, s14, #25
+# CHECK-NEXT: 1 4 0.25 sqshlu v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.25 sqshlu v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 sqshlu v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.25 sqshlu v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.25 sqshlu v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 sqshlu v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.25 sqshlu v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 sqshrn b10, h15, #5
+# CHECK-NEXT: 1 4 0.25 sqshrn h17, s10, #4
+# CHECK-NEXT: 1 4 0.25 sqshrn s18, d10, #31
+# CHECK-NEXT: 1 4 0.25 sqshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 sqshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 sqshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 sqshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 sqshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 sqshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 sqshrun b15, h10, #7
+# CHECK-NEXT: 1 4 0.25 sqshrun h20, s14, #3
+# CHECK-NEXT: 1 4 0.25 sqshrun s10, d15, #15
+# CHECK-NEXT: 1 4 0.25 sqshrun v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 sqshrun v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 sqshrun v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 sqshrun2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 sqshrun2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 sqshrun2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 2 0.25 sqsub s20, s10, s7
+# CHECK-NEXT: 1 2 0.25 sqsub v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 sqsub v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 sqsub v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.25 sqxtn b18, h18
+# CHECK-NEXT: 1 4 0.25 sqxtn h20, s17
+# CHECK-NEXT: 1 4 0.25 sqxtn s19, d14
+# CHECK-NEXT: 1 4 0.25 sqxtn v0.2s, v0.2d
+# CHECK-NEXT: 1 4 0.25 sqxtn v0.4h, v0.4s
+# CHECK-NEXT: 1 4 0.25 sqxtn v0.8b, v0.8h
+# CHECK-NEXT: 1 4 0.25 sqxtn2 v0.16b, v0.8h
+# CHECK-NEXT: 1 4 0.25 sqxtn2 v0.4s, v0.2d
+# CHECK-NEXT: 1 4 0.25 sqxtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 4 0.25 sqxtun b19, h14
+# CHECK-NEXT: 1 4 0.25 sqxtun h21, s15
+# CHECK-NEXT: 1 4 0.25 sqxtun s20, d12
+# CHECK-NEXT: 1 4 0.25 sqxtun v0.2s, v0.2d
+# CHECK-NEXT: 1 4 0.25 sqxtun v0.4h, v0.4s
+# CHECK-NEXT: 1 4 0.25 sqxtun v0.8b, v0.8h
+# CHECK-NEXT: 1 4 0.25 sqxtun2 v0.16b, v0.8h
+# CHECK-NEXT: 1 4 0.25 sqxtun2 v0.4s, v0.2d
+# CHECK-NEXT: 1 4 0.25 sqxtun2 v0.8h, v0.4s
+# CHECK-NEXT: 1 2 0.25 srhadd v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 srhadd v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 srhadd v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 sri d10, d12, #14
+# CHECK-NEXT: 1 2 0.25 sri v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.25 sri v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 2 0.25 sri v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 2 0.25 sri v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 2 0.25 sri v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 2 0.25 sri v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 2 0.25 sri v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 srshl d16, d16, d16
+# CHECK-NEXT: 1 4 0.25 srshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.25 srshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.25 srshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.25 srshr d19, d18, #7
+# CHECK-NEXT: 1 4 0.25 srshr v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.25 srshr v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 srshr v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.25 srshr v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.25 srshr v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 srshr v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.25 srshr v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 srsra d15, d11, #19
+# CHECK-NEXT: 1 4 0.25 srsra v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.25 srsra v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 srsra v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.25 srsra v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.25 srsra v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 srsra v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.25 srsra v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 2 0.25 sshl d31, d31, d31
+# CHECK-NEXT: 1 2 0.25 sshl v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 sshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 sshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 sshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 sshll v0.2d, v0.2s, #3
+# CHECK-NEXT: 1 2 0.25 sshll2 v0.4s, v0.8h, #3
+# CHECK-NEXT: 1 2 0.25 sshr d15, d16, #12
+# CHECK-NEXT: 1 2 0.25 sshr v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.25 sshr v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 2 0.25 sshr v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 2 0.25 sshr v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 2 0.25 sshr v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 2 0.25 sshr v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 2 0.25 sshr v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 ssra d18, d12, #21
+# CHECK-NEXT: 1 4 0.25 ssra v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.25 ssra v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 ssra v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.25 ssra v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.25 ssra v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 ssra v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.25 ssra v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 2 0.25 ssubl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 ssubl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 ssubl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 ssubl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 ssubl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 ssubl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 ssubw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: 1 2 0.25 ssubw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: 1 2 0.25 ssubw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: 1 2 0.25 ssubw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: 1 2 0.25 ssubw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: 1 2 0.25 ssubw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: 2 2 0.50 * st1 { v0.16b }, [x0]
+# CHECK-NEXT: 7 2 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK-NEXT: 8 2 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+# CHECK-NEXT: 5 2 1.00 * st1 { v0.4s, v1.4s }, [sp], #32
+# CHECK-NEXT: 6 2 1.50 * st1 { v0.4s, v1.4s, v2.4s }, [sp]
+# CHECK-NEXT: 5 2 1.00 * st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+# CHECK-NEXT: 3 2 0.50 * st1 { v0.8h }, [x15], x2
+# CHECK-NEXT: 4 2 1.00 * st1 { v0.8h, v1.8h }, [x15]
+# CHECK-NEXT: 3 4 1.00 * st1 { v0.d }[1], [x0]
+# CHECK-NEXT: 4 4 1.00 * st1 { v0.d }[1], [x0], #8
+# CHECK-NEXT: 7 4 2.00 * st2 { v0.16b, v1.16b }, [x0], x1
+# CHECK-NEXT: 3 4 1.00 * st2 { v0.8b, v1.8b }, [x0]
+# CHECK-NEXT: 3 4 1.00 * st2 { v0.s, v1.s }[3], [sp]
+# CHECK-NEXT: 4 4 1.00 * st2 { v0.s, v1.s }[3], [sp], #8
+# CHECK-NEXT: 6 5 2.00 * st3 { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: 10 6 3.00 * st3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+# CHECK-NEXT: 6 5 2.00 * st3 { v0.h, v1.h, v2.h }[7], [x15]
+# CHECK-NEXT: 7 5 2.00 * st3 { v0.h, v1.h, v2.h }[7], [x15], #6
+# CHECK-NEXT: 8 6 3.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: 17 7 6.00 * st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: 4 6 1.50 * st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0]
+# CHECK-NEXT: 7 4 2.00 * st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], x5
+# CHECK-NEXT: 1 2 0.25 sub d15, d5, d16
+# CHECK-NEXT: 1 2 0.25 sub v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.25 sudot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: 1 3 0.25 sudot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: 1 2 0.25 suqadd b19, b14
+# CHECK-NEXT: 1 2 0.25 suqadd d18, d22
+# CHECK-NEXT: 1 2 0.25 suqadd h20, h15
+# CHECK-NEXT: 1 2 0.25 suqadd s21, s12
+# CHECK-NEXT: 1 2 0.25 suqadd v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 suqadd v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 suqadd v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 suqadd v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 suqadd v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 suqadd v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 suqadd v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 tbl v0.16b, { v0.16b }, v0.16b
+# CHECK-NEXT: 1 2 0.25 tbl v0.16b, { v0.16b, v1.16b }, v0.16b
+# CHECK-NEXT: 2 4 0.50 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+# CHECK-NEXT: 3 4 0.75 tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+# CHECK-NEXT: 1 2 0.25 tbl v0.8b, { v0.16b }, v0.8b
+# CHECK-NEXT: 1 2 0.25 tbl v0.8b, { v0.16b, v1.16b }, v0.8b
+# CHECK-NEXT: 2 4 0.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+# CHECK-NEXT: 3 4 0.75 tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+# CHECK-NEXT: 1 2 0.25 tbx v0.16b, { v0.16b }, v0.16b
+# CHECK-NEXT: 2 4 0.50 tbx v0.16b, { v0.16b, v1.16b }, v0.16b
+# CHECK-NEXT: 3 6 0.75 tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+# CHECK-NEXT: 5 6 1.25 tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+# CHECK-NEXT: 1 2 0.25 tbx v0.8b, { v0.16b }, v0.8b
+# CHECK-NEXT: 2 4 0.50 tbx v0.8b, { v0.16b, v1.16b }, v0.8b
+# CHECK-NEXT: 3 6 0.75 tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+# CHECK-NEXT: 5 6 1.25 tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+# CHECK-NEXT: 1 2 0.25 trn1 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 trn1 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 trn1 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 trn1 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 trn1 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 trn1 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 trn1 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 trn2 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 trn2 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 trn2 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 trn2 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 trn2 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 trn2 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 trn2 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.25 uaba v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.25 uabal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.25 uabal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.25 uabal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.25 uabal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.25 uabal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.25 uabal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 uabd v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 uabdl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 uabdl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 uabdl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 uabdl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 uabdl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 uabdl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.25 uadalp v0.1d, v0.2s
+# CHECK-NEXT: 1 4 0.25 uadalp v0.2d, v0.4s
+# CHECK-NEXT: 1 4 0.25 uadalp v0.2s, v0.4h
+# CHECK-NEXT: 1 4 0.25 uadalp v0.4h, v0.8b
+# CHECK-NEXT: 1 4 0.25 uadalp v0.4s, v0.8h
+# CHECK-NEXT: 1 4 0.25 uadalp v0.8h, v0.16b
+# CHECK-NEXT: 1 2 0.25 uaddl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 uaddl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 uaddl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 uaddl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 uaddl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 uaddl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 uaddlp v0.1d, v0.2s
+# CHECK-NEXT: 1 2 0.25 uaddlp v0.2d, v0.4s
+# CHECK-NEXT: 1 2 0.25 uaddlp v0.2s, v0.4h
+# CHECK-NEXT: 1 2 0.25 uaddlp v0.4h, v0.8b
+# CHECK-NEXT: 1 2 0.25 uaddlp v0.4s, v0.8h
+# CHECK-NEXT: 1 2 0.25 uaddlp v0.8h, v0.16b
+# CHECK-NEXT: 1 3 0.50 uaddlv d0, v0.4s
+# CHECK-NEXT: 1 3 0.50 uaddlv s0, v0.4h
+# CHECK-NEXT: 2 5 0.50 uaddlv s0, v0.8h
+# CHECK-NEXT: 2 5 0.50 uaddlv h0, v0.8b
+# CHECK-NEXT: 2 6 1.00 uaddlv h0, v0.16b
+# CHECK-NEXT: 1 2 0.25 uaddw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: 1 2 0.25 uaddw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: 1 2 0.25 uaddw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: 1 2 0.25 uaddw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: 1 2 0.25 uaddw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: 1 2 0.25 uaddw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: 1 3 0.50 ucvtf d21, d14
+# CHECK-NEXT: 1 3 0.50 ucvtf d21, d14, #64
+# CHECK-NEXT: 2 4 1.00 ucvtf s22, s13
+# CHECK-NEXT: 2 4 1.00 ucvtf s22, s13, #32
+# CHECK-NEXT: 1 3 0.50 ucvtf v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 ucvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.50 ucvtf v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.50 ucvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: 2 4 1.00 ucvtf v0.4h, v0.4h
+# CHECK-NEXT: 2 4 1.00 ucvtf v0.4s, v0.4s
+# CHECK-NEXT: 2 4 1.00 ucvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: 4 6 2.00 ucvtf v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.25 udot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: 1 3 0.25 udot v0.2s, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.25 udot v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.25 udot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: 1 2 0.25 uhadd v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 uhadd v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 uhsub v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 umax v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 umax v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 umax v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 umaxp v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 umaxp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 umaxp v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 2 5 0.50 umaxv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 umaxv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 umaxv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 umaxv h0, v0.8h
+# CHECK-NEXT: 1 3 0.50 umaxv s0, v0.4s
+# CHECK-NEXT: 1 2 0.25 umin v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 umin v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 umin v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 uminp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 uminp v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 uminp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 2 5 0.50 uminv b0, v0.8b
+# CHECK-NEXT: 2 6 1.00 uminv b0, v0.16b
+# CHECK-NEXT: 1 3 0.50 uminv h0, v0.4h
+# CHECK-NEXT: 2 5 0.50 uminv h0, v0.8h
+# CHECK-NEXT: 1 3 0.50 uminv s0, v0.4s
+# CHECK-NEXT: 1 4 0.50 umlal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 umlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 umlal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.50 umlal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 umlal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 umlal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.50 umlsl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 umlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 umlsl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.50 umlsl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 umlsl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 umlsl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.25 ummla v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: 2 2 1.00 umov w0, v0.b[1]
+# CHECK-NEXT: 2 2 1.00 umov w0, v0.h[1]
+# CHECK-NEXT: 2 2 1.00 mov w0, v0.s[1]
+# CHECK-NEXT: 2 2 1.00 mov x0, v0.d[1]
+# CHECK-NEXT: 1 3 0.50 umull v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.50 umull v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 3 0.50 umull v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.50 umull2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 0.50 umull2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 umull2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 uqadd h0, h1, h5
+# CHECK-NEXT: 1 2 0.25 uqadd v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.25 uqrshl b11, b20, b30
+# CHECK-NEXT: 1 4 0.25 uqrshl s23, s20, s16
+# CHECK-NEXT: 1 4 0.25 uqrshl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.25 uqrshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.25 uqrshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.25 uqrshl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.25 uqrshrn b10, h12, #5
+# CHECK-NEXT: 1 4 0.25 uqrshrn h12, s10, #14
+# CHECK-NEXT: 1 4 0.25 uqrshrn s10, d10, #25
+# CHECK-NEXT: 1 4 0.25 uqrshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 uqrshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 uqrshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 uqrshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 uqrshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 uqrshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 uqshl b11, b20, b30
+# CHECK-NEXT: 1 4 0.25 uqshl b18, b15, #6
+# CHECK-NEXT: 1 4 0.25 uqshl d15, d12, #19
+# CHECK-NEXT: 1 4 0.25 uqshl h11, h18, #7
+# CHECK-NEXT: 1 4 0.25 uqshl s14, s19, #18
+# CHECK-NEXT: 1 4 0.25 uqshl s23, s20, s16
+# CHECK-NEXT: 1 4 0.25 uqshl v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.25 uqshl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.25 uqshl v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 uqshl v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 4 0.25 uqshl v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.25 uqshl v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.25 uqshl v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 uqshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.25 uqshl v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.25 uqshl v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 uqshl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.25 uqshrn b12, h10, #7
+# CHECK-NEXT: 1 4 0.25 uqshrn h10, s14, #5
+# CHECK-NEXT: 1 4 0.25 uqshrn s10, d12, #13
+# CHECK-NEXT: 1 4 0.25 uqshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 uqshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 uqshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 uqshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.25 uqshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 uqshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 2 0.25 uqsub d16, d16, d16
+# CHECK-NEXT: 1 2 0.25 uqsub v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.25 uqxtn b18, h18
+# CHECK-NEXT: 1 4 0.25 uqxtn h20, s17
+# CHECK-NEXT: 1 4 0.25 uqxtn s19, d14
+# CHECK-NEXT: 1 4 0.25 uqxtn v0.2s, v0.2d
+# CHECK-NEXT: 1 4 0.25 uqxtn v0.4h, v0.4s
+# CHECK-NEXT: 1 4 0.25 uqxtn v0.8b, v0.8h
+# CHECK-NEXT: 1 4 0.25 uqxtn2 v0.16b, v0.8h
+# CHECK-NEXT: 1 4 0.25 uqxtn2 v0.4s, v0.2d
+# CHECK-NEXT: 1 4 0.25 uqxtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 3 0.50 urecpe v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 urecpe v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 urhadd v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 urhadd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 urhadd v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.25 urshl d8, d7, d4
+# CHECK-NEXT: 1 4 0.25 urshl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.25 urshl v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 4 0.25 urshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.25 urshl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.25 urshr d20, d23, #31
+# CHECK-NEXT: 1 4 0.25 urshr v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.25 urshr v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 urshr v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.25 urshr v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.25 urshr v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 urshr v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.25 urshr v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 3 0.50 ursqrte v0.2s, v0.2s
+# CHECK-NEXT: 2 4 1.00 ursqrte v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.25 ursra d18, d10, #13
+# CHECK-NEXT: 1 4 0.25 ursra v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.25 ursra v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 ursra v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.25 ursra v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.25 ursra v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 ursra v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.25 ursra v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 3 0.25 usdot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: 1 3 0.25 usdot v0.2s, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.25 usdot v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.25 usdot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: 1 2 0.25 ushl d0, d0, d0
+# CHECK-NEXT: 1 2 0.25 ushl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 ushl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 ushl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 ushll v0.4s, v0.4h, #3
+# CHECK-NEXT: 1 2 0.25 ushll2 v0.8h, v0.16b, #3
+# CHECK-NEXT: 1 2 0.25 ushr d10, d17, #18
+# CHECK-NEXT: 1 2 0.25 ushr v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.25 ushr v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 2 0.25 ushr v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 2 0.25 ushr v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 2 0.25 ushr v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 2 0.25 ushr v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 2 0.25 ushr v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 3 0.25 usmmla v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: 2 2 1.00 smov w0, v0.b[1]
+# CHECK-NEXT: 2 2 1.00 smov w0, v0.h[1]
+# CHECK-NEXT: 2 2 1.00 smov x0, v0.b[1]
+# CHECK-NEXT: 2 2 1.00 smov x0, v0.h[1]
+# CHECK-NEXT: 2 2 1.00 smov x0, v0.s[1]
+# CHECK-NEXT: 1 2 0.25 usqadd b19, b14
+# CHECK-NEXT: 1 2 0.25 usqadd d18, d22
+# CHECK-NEXT: 1 2 0.25 usqadd h20, h15
+# CHECK-NEXT: 1 2 0.25 usqadd s21, s12
+# CHECK-NEXT: 1 2 0.25 usqadd v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 usqadd v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 usqadd v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 usqadd v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 usqadd v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 usqadd v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 usqadd v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.25 usra d20, d13, #61
+# CHECK-NEXT: 1 4 0.25 usra v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.25 usra v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.25 usra v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.25 usra v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.25 usra v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.25 usra v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.25 usra v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 2 0.25 usubl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 usubl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 usubl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 usubl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 usubl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 usubl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 usubw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: 1 2 0.25 usubw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: 1 2 0.25 usubw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: 1 2 0.25 usubw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: 1 2 0.25 usubw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: 1 2 0.25 usubw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: 1 2 0.25 uzp1 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 uzp1 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 uzp1 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 uzp1 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 uzp1 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 uzp1 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 uzp1 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 uzp2 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 uzp2 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 uzp2 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 uzp2 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 uzp2 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 uzp2 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 uzp2 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 xtn v0.2s, v0.2d
+# CHECK-NEXT: 1 2 0.25 xtn v0.4h, v0.4s
+# CHECK-NEXT: 1 2 0.25 xtn v0.8b, v0.8h
+# CHECK-NEXT: 1 2 0.25 xtn2 v0.16b, v0.8h
+# CHECK-NEXT: 1 2 0.25 xtn2 v0.4s, v0.2d
+# CHECK-NEXT: 1 2 0.25 xtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 2 0.25 zip1 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 zip1 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 zip1 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 zip1 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 zip1 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 zip1 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 zip1 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.25 zip2 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.25 zip2 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.25 zip2 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.25 zip2 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.25 zip2 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.25 zip2 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.25 zip2 v0.8h, v0.8h, v0.8h
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - 27.33 27.33 48.83 14.88 3.88 3.88 3.88 3.88 3.88 3.88 3.88 21.50 525.00 340.50 478.50 254.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs d29, d24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add d17, d31, d29
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 addv s0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 addv h0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 addv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 addv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 addv b0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aesd v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aese v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aesimc v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aesmc v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - bfcvt h0, s0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - bfcvtn v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - bfcvtn2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfdot v0.2s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfdot v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalb v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalb v0.4s, v0.8h, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalt v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalt v0.4s, v0.8h, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmmla v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bic v0.4h, #15, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bic v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bif v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bit v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bsl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cls v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cls v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cls v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cls v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cls v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cls v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 clz v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 clz v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 clz v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 clz v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 clz v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 clz v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmeq d20, d21, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmeq d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmeq v0.16b, v0.16b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmeq v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmge d20, d21, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmge d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmge v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmge v0.8b, v0.8b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmgt d20, d21, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmgt d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmgt v0.2s, v0.2s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmgt v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmhi d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmhi v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmhs d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmhs v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmle d20, d21, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmle v0.2d, v0.2d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmlt d20, d21, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmlt v0.8h, v0.8h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmtst d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cmtst v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cnt v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cnt v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - dup v0.16b, w28
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - dup v0.2d, x28
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - dup v0.2s, w28
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - dup v0.4h, w28
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - dup v0.4s, w28
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - dup v0.8b, w28
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - dup v0.8h, w28
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov b0, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov d0, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov h0, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov s0, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 dup v0.16b, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 dup v0.2d, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 dup v0.2s, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 dup v0.4h, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 dup v0.4s, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 dup v0.8b, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 dup v0.8h, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ext v0.16b, v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ext v0.8b, v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabd d29, d24, d20
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabd s29, s24, s20
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabs v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabs v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabs v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabs v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabs v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 facge d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 facge s10, s11, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 facge v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 facgt d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 facgt s10, s11, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 facgt v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 faddp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 faddp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcadd v0.2s, v0.2s, v0.2s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcadd v0.4s, v0.4s, v0.4s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmeq d20, d21, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmeq d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmeq s10, s11, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmeq s10, s11, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmeq v0.2s, v0.2s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmeq v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmge d20, d21, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmge d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmge s10, s11, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmge s10, s11, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmge v0.2d, v0.2d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmge v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmgt d20, d21, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmgt d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmgt s10, s11, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmgt s10, s11, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmgt v0.4s, v0.4s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmgt v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla v0.2s, v0.2s, v0.2s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla v0.4s, v0.4s, v0.s[1], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmle d20, d21, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmle s10, s11, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmle v0.2d, v0.2d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmlt d20, d21, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmlt s10, s11, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmlt v0.4s, v0.4s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtas d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtas s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtas h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtas v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtas v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtas v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtas v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtas v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtau d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtau s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtau h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtau v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtau v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtau v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtau v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtau v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtl v0.2d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtl v0.4s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtl2 v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtl2 v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtms d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtms s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtms h22, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtms v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtms v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtms v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtms v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtms v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtmu d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtmu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtmu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtmu v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtmu v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtmu v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtmu v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtmu v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtn v0.2s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtn v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtn2 v0.4s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtn2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtns d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtns s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtns h22, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtns v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtns v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtns v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtns v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtns v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtnu d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtnu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtnu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtnu v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtnu v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtnu v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtnu v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtnu v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtps d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtps s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtps h22, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtps v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtps v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtps v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtps v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtps v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtpu d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtpu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtpu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtpu v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtpu v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtpu v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtpu v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtpu v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtxn s22, d13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtxn v0.2s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtxn2 v0.4s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs d21, d12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs s21, s12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzs h21, h14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzs h21, h12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzs v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu d21, d12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu s21, s12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzu h21, h12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzu v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - fdiv v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - fdiv v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 - - fdiv v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 - - fdiv v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 8.00 - - fdiv v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnmp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnmp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnmp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxv h0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.75 0.75 0.75 0.75 fmaxv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxv s0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnmp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnmp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnmp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmla d0, d1, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmla s0, s1, v0.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmla v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlal v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlal v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlal v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlal2 v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlal2 v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlal2 v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlal2 v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmls d0, d4, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmls s3, s5, v0.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmls v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlsl v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlsl v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlsl v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlsl2 v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlsl2 v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlsl2 v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlsl2 v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov v0.2d, #-1.25000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov v0.2s, #13.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov v0.4s, #1.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul d0, d1, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul s0, s1, v0.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmulx d0, d4, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmulx d23, d11, d1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmulx s20, s22, s15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmulx s3, s5, v0.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmulx v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmulx v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmulx v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frecpe d13, d13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frecpe s19, s14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frecpe v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frecpe v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frecpe v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frecpe v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frecpe v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frecps v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frecps d22, d30, d21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frecps s21, s16, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frecpx d16, d19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frecpx s18, s10
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frint32x v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frint32x v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frint32x v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frint32z v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frint32z v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frint32z v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frint64x v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frint64x v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frint64x v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frint64z v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frint64z v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frint64z v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frinta v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frinta v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frinta v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frinta v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frinta v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frinti v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frinti v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frinti v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frinti v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frinti v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintm v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintm v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintm v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintm v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frintm v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintn v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintn v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintn v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintn v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frintn v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintp v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintp v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintp v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintp v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frintp v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintx v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintx v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintx v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintx v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frintx v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintz v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintz v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintz v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintz v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frintz v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frsqrte d21, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frsqrte s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frsqrte v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frsqrte v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frsqrte v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frsqrte v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frsqrte v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frsqrts d8, d22, d18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frsqrts s21, s5, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frsqrts v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - fsqrt v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - fsqrt v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 - - fsqrt v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 - - fsqrt v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 8.00 - - fsqrt v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1 { v0.16b }, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - - - - - ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ld1 { v0.4s, v1.4s }, [sp], #32
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - - - - - ld1 { v0.4s, v1.4s, v2.4s }, [sp]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ld1 { v0.8h }, [x15], x2
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - - - - - ld1 { v0.8h, v1.8h }, [x15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.25 0.25 0.25 0.25 ld1 { v0.b }[9], [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.25 0.25 0.25 0.25 ld1 { v0.b }[9], [x0], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.25 0.25 0.25 0.25 ld1r { v0.16b }, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.25 0.25 0.25 0.25 ld1r { v0.16b }, [x0], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.25 0.25 0.25 0.25 ld1r { v0.8h }, [x15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.25 0.25 0.25 0.25 ld1r { v0.8h }, [x15], #2
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.50 0.50 0.50 0.50 ld2 { v0.16b, v1.16b }, [x0], x1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2 { v0.8b, v1.8b }, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2 { v0.h, v1.h }[7], [x15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.50 0.50 0.50 0.50 ld2 { v0.h, v1.h }[7], [x15], #4
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2r { v0.2d, v1.2d }, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.50 0.50 0.50 0.50 ld2r { v0.2d, v1.2d }, [x0], #16
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2r { v0.4s, v1.4s }, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.50 0.50 0.50 0.50 ld2r { v0.4s, v1.4s }, [sp], #8
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3 { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.75 0.75 0.75 0.75 ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3 { v0.s, v1.s, v2.s }[3], [sp]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.75 0.75 0.75 0.75 ld3 { v0.s, v1.s, v2.s }[3], [sp], x3
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3r { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.75 0.75 0.75 0.75 ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3r { v0.8b, v1.8b, v2.8b }, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.75 0.75 0.75 0.75 ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.00 1.00 1.00 1.00 ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: - - - - - - - - - 2.00 2.00 2.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.00 1.00 1.00 1.00 ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.00 1.00 1.00 1.00 ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.00 1.00 1.00 1.00 ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.00 1.00 1.00 1.00 ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.00 1.00 1.00 1.00 ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.00 1.00 1.00 1.00 ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.00 1.00 1.00 1.00 ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.00 1.00 1.00 1.00 ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mla v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mls v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov b0, v0.b[15]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov d6, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov h2, v0.h[5]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov s17, v0.s[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov v2.b[0], v0.b[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov v2.h[1], v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov v2.s[2], v0.s[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov v2.d[1], v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 mov v0.b[0], w8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 mov v0.h[1], w8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 mov v0.s[2], w8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 mov v0.d[1], x8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 movi d15, #0xff00ff00ff00ff
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 movi v0.16b, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 movi v0.2d, #0xff0000ff0000ffff
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 movi v0.2s, #8, msl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 movi v0.4s, #255, lsl #24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 movi v0.8b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mvni v0.2s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mvni v0.4s, #16, msl #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg d29, d24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mvn v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mvn v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orn v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr v0.8h, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 pmul v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 pmul v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 pmull v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 pmull2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 raddhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 raddhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 raddhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 raddhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 raddhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 raddhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rbit v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rbit v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev16 v21.8b, v1.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev16 v30.16b, v31.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev32 v0.4h, v9.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev32 v21.8b, v1.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev32 v30.16b, v31.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev32 v4.8h, v7.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev64 v0.16b, v31.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev64 v1.8b, v9.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev64 v13.4h, v21.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev64 v2.8h, v4.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev64 v4.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev64 v6.4s, v8.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rsubhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rsubhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rsubhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rsubhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rsubhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rsubhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saba v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabd v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabdl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabdl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabdl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabdl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabdl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabdl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sadalp v0.1d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sadalp v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sadalp v0.2s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sadalp v0.4h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sadalp v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sadalp v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlp v0.1d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlp v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlp v0.2s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlp v0.4h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlp v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlp v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 saddlv d0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 saddlv s0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 saddlv s0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 saddlv h0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 saddlv h0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf d21, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf d21, d12, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf s22, s13, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - scvtf v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot v0.2s, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shadd v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shl d7, d10, #12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shl v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shl v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shl v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shl v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shll v0.2d, v0.2s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shll v0.4s, v0.4h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shll v0.8h, v0.8b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shll v0.2d, v0.2s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shll v0.4s, v0.4h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shll v0.8h, v0.8b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shll2 v0.2d, v0.4s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shll2 v0.4s, v0.8h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shll2 v0.8h, v0.16b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shll2 v0.2d, v0.4s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shll2 v0.4s, v0.8h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shll2 v0.8h, v0.16b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrn v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrn v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrn v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shsub v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shsub v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli d10, d14, #12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smaxp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smaxp v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smaxp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 smaxv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 smaxv b0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 smaxv h0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 smaxv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 smaxv s0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sminp v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sminp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sminp v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 sminv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 sminv b0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 sminv h0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 sminv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 sminv s0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlsl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlsl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlsl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlsl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlsl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smmla v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smull v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smull v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smull v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smull2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smull2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smull2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs b19, b14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs d18, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs h21, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs s20, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd b20, b11, b15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlal d19, s24, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlal d8, s9, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlal s0, h0, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlal s17, h27, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlsl d12, s23, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlsl d8, s9, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlsl s0, h0, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlsl s14, h12, h25
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlsl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlsl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlsl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh h10, h11, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh h7, h15, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh s15, s14, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh s20, s21, s2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull d1, s1, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull d15, s22, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull s1, h1, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull s12, h22, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmull2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg b19, b14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg d18, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg h21, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg s20, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah h0, h1, v2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah v0.4h, v1.4h, v2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah v0.8h, v1.8h, v2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah s0, s1, v2.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah v0.2s, v1.2s, v2.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah v0.4s, v1.4s, v2.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah h0, h1, h2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah v0.4h, v1.4h, v2.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah v0.8h, v1.8h, v2.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah s0, s1, s2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah v0.2s, v1.2s, v2.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh h0, h1, v2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh v0.4h, v1.4h, v2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh v0.8h, v1.8h, v2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh s0, s1, v2.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh v0.2s, v1.2s, v2.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh v0.4s, v1.4s, v2.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh h0, h1, h2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh v0.4h, v1.4h, v2.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh v0.8h, v1.8h, v2.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh s0, s1, s2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh v0.2s, v1.2s, v2.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmulh h10, h11, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmulh h7, h15, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmulh s15, s14, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmulh s20, s21, s2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmulh v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmulh v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshl d31, d31, d31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshl h3, h4, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrn b10, h13, #2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrn h15, s10, #6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrn s15, d12, #9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrun b17, h10, #6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrun h10, s13, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrun s22, d16, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrun v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrun v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrun v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrun2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrun2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrun2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl b11, b19, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl d15, d16, #51
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl d31, d31, d31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl h13, h18, #11
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl h3, h4, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl s14, s17, #22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu b15, b18, #6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu d11, d13, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu h19, h17, #6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu s16, s14, #25
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrn b10, h15, #5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrn h17, s10, #4
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrn s18, d10, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun b15, h10, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun h20, s14, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun s10, d15, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrun2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub s20, s10, s7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtn b18, h18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtn h20, s17
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtn s19, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtn v0.2s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtn v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtn v0.8b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtn2 v0.16b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtn2 v0.4s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtn2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtun b19, h14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtun h21, s15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtun s20, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtun v0.2s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtun v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtun v0.8b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtun2 v0.16b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtun2 v0.4s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtun2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srhadd v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srhadd v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srhadd v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri d10, d12, #14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshl d16, d16, d16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr d19, d18, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srsra d15, d11, #19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srsra v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srsra v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srsra v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srsra v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srsra v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srsra v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srsra v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshl d31, d31, d31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshl v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshll v0.2d, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshll2 v0.4s, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshr d15, d16, #12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshr v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshr v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshr v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshr v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshr v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshr v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshr v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssra d18, d12, #21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssra v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssra v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssra v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssra v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssra v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssra v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssra v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1 { v0.16b }, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.50 1.50 1.50 - - st1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 - - st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.00 1.00 1.00 - - st1 { v0.4s, v1.4s }, [sp], #32
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1 { v0.4s, v1.4s, v2.4s }, [sp]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.00 1.00 1.00 - - st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 - - st1 { v0.8h }, [x15], x2
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 - - st1 { v0.8h, v1.8h }, [x15]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 - - st1 { v0.d }[1], [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 1.00 1.00 - - st1 { v0.d }[1], [x0], #8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.00 2.00 2.00 - - st2 { v0.16b, v1.16b }, [x0], x1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 - - st2 { v0.8b, v1.8b }, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 - - st2 { v0.s, v1.s }[3], [sp]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 1.00 1.00 - - st2 { v0.s, v1.s }[3], [sp], #8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 2.00 2.00 - - st3 { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.50 3.00 3.00 - - st3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 2.00 2.00 - - st3 { v0.h, v1.h, v2.h }[7], [x15]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.00 2.00 2.00 - - st3 { v0.h, v1.h, v2.h }[7], [x15], #6
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 3.00 3.00 - - st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 2.00 6.00 6.00 - - st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.50 1.50 - - st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.00 2.00 2.00 - - st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], x5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub d15, d5, d16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sudot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sudot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd b19, b14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd d18, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd h20, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd s21, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbl v0.16b, { v0.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbl v0.16b, { v0.16b, v1.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.75 0.75 0.75 0.75 tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbl v0.8b, { v0.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbl v0.8b, { v0.16b, v1.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.75 0.75 0.75 0.75 tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbx v0.16b, { v0.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 tbx v0.16b, { v0.16b, v1.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.75 0.75 0.75 0.75 tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.25 1.25 1.25 1.25 tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbx v0.8b, { v0.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 tbx v0.8b, { v0.16b, v1.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.75 0.75 0.75 0.75 tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.25 1.25 1.25 1.25 tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn1 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn1 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn1 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn1 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn1 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn1 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn1 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn2 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn2 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn2 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn2 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn2 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn2 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn2 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaba v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabd v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabdl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabdl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabdl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabdl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabdl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabdl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uadalp v0.1d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uadalp v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uadalp v0.2s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uadalp v0.4h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uadalp v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uadalp v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlp v0.1d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlp v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlp v0.2s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlp v0.4h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlp v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlp v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 uaddlv d0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 uaddlv s0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 uaddlv s0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 uaddlv h0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 uaddlv h0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf d21, d14, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf s22, s13, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - ucvtf v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot v0.2s, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhadd v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhadd v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhsub v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umax v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umax v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umax v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umaxp v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umaxp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umaxp v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 umaxv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 umaxv b0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 umaxv h0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 umaxv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 umaxv s0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umin v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umin v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umin v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uminp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uminp v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uminp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 uminv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 uminv b0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 uminv h0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 uminv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 uminv s0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlsl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlsl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlsl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlsl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlsl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ummla v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - umov w0, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - umov w0, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - mov w0, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - mov x0, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umull v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umull v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umull v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umull2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umull2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umull2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd h0, h1, h5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshl b11, b20, b30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshl s23, s20, s16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrn b10, h12, #5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrn h12, s10, #14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrn s10, d10, #25
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl b11, b20, b30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl b18, b15, #6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl d15, d12, #19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl h11, h18, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl s14, s19, #18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl s23, s20, s16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrn b12, h10, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrn h10, s14, #5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrn s10, d12, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub d16, d16, d16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtn b18, h18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtn h20, s17
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtn s19, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtn v0.2s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtn v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtn v0.8b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtn2 v0.16b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtn2 v0.4s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtn2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - urecpe v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - urecpe v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urhadd v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urhadd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urhadd v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshl d8, d7, d4
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshl v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr d20, d23, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - ursqrte v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - ursqrte v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ursra d18, d10, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ursra v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ursra v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ursra v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ursra v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ursra v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ursra v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ursra v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usdot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usdot v0.2s, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usdot v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usdot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushl d0, d0, d0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushll v0.4s, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushll2 v0.8h, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushr d10, d17, #18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushr v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushr v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushr v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushr v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushr v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushr v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushr v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usmmla v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - smov w0, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - smov w0, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - smov x0, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - smov x0, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 - - smov x0, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd b19, b14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd d18, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd h20, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd s21, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usra d20, d13, #61
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usra v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usra v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usra v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usra v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usra v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usra v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usra v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp1 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp1 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp1 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp1 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp1 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp1 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp1 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp2 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp2 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp2 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp2 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp2 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp2 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp2 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xtn v0.2s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xtn v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xtn v0.8b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xtn2 v0.16b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xtn2 v0.4s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xtn2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 v0.8h, v0.8h, v0.8h
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-sve-instructions.s
new file mode 100644
index 0000000..5ca5765
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-sve-instructions.s
@@ -0,0 +1,10289 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3 -mattr=+sve2-aes,+sve2-sha3,+sve2-sm4 -instruction-tables < %s | FileCheck %s
+
+abs z0.b, p0/m, z0.b
+abs z0.d, p0/m, z0.d
+abs z0.h, p0/m, z0.h
+abs z0.s, p0/m, z0.s
+abs z31.b, p7/m, z31.b
+abs z31.d, p7/m, z31.d
+abs z31.h, p7/m, z31.h
+abs z31.s, p7/m, z31.s
+adclb z0.d, z1.d, z31.d
+adclb z0.s, z1.s, z31.s
+adclt z0.d, z1.d, z31.d
+adclt z0.s, z1.s, z31.s
+add z0.b, p0/m, z0.b, z0.b
+add z0.b, z0.b, #0
+add z0.b, z0.b, z0.b
+add z0.d, p0/m, z0.d, z0.d
+add z0.d, z0.d, #0
+add z0.d, z0.d, #0, lsl #8
+add z0.d, z0.d, z0.d
+add z0.h, p0/m, z0.h, z0.h
+add z0.h, z0.h, #0
+add z0.h, z0.h, #0, lsl #8
+add z0.h, z0.h, z0.h
+add z0.s, p0/m, z0.s, z0.s
+add z0.s, z0.s, #0
+add z0.s, z0.s, #0, lsl #8
+add z0.s, z0.s, z0.s
+add z0.s, z1.s, z2.s
+add z21.b, p5/m, z21.b, z10.b
+add z21.b, z10.b, z21.b
+add z21.d, p5/m, z21.d, z10.d
+add z21.d, z10.d, z21.d
+add z21.h, p5/m, z21.h, z10.h
+add z21.h, z10.h, z21.h
+add z21.s, p5/m, z21.s, z10.s
+add z21.s, z10.s, z21.s
+add z23.b, p3/m, z23.b, z13.b
+add z23.b, z13.b, z8.b
+add z23.d, p3/m, z23.d, z13.d
+add z23.d, z13.d, z8.d
+add z23.h, p3/m, z23.h, z13.h
+add z23.h, z13.h, z8.h
+add z23.s, p3/m, z23.s, z13.s
+add z23.s, z13.s, z8.s
+add z31.b, p7/m, z31.b, z31.b
+add z31.b, z31.b, #255
+add z31.b, z31.b, z31.b
+add z31.d, p7/m, z31.d, z31.d
+add z31.d, z31.d, #65280
+add z31.d, z31.d, z31.d
+add z31.h, p7/m, z31.h, z31.h
+add z31.h, z31.h, #65280
+add z31.h, z31.h, z31.h
+add z31.s, p7/m, z31.s, z31.s
+add z31.s, z31.s, #65280
+add z31.s, z31.s, z31.s
+addhnb z0.b, z1.h, z31.h
+addhnb z0.h, z1.s, z31.s
+addhnb z0.s, z1.d, z31.d
+addhnt z0.b, z1.h, z31.h
+addhnt z0.h, z1.s, z31.s
+addhnt z0.s, z1.d, z31.d
+addp z0.b, p0/m, z0.b, z1.b
+addp z0.h, p0/m, z0.h, z1.h
+addp z29.s, p7/m, z29.s, z30.s
+addp z31.d, p7/m, z31.d, z30.d
+addpl sp, sp, #31
+addpl x0, x0, #-32
+addpl x21, x21, #0
+addpl x23, x8, #-1
+addvl sp, sp, #31
+addvl x0, x0, #-32
+addvl x21, x21, #0
+addvl x23, x8, #-1
+adr z0.d, [z0.d, z0.d, lsl #1]
+adr z0.d, [z0.d, z0.d, lsl #2]
+adr z0.d, [z0.d, z0.d, lsl #3]
+adr z0.d, [z0.d, z0.d, sxtw #1]
+adr z0.d, [z0.d, z0.d, sxtw #2]
+adr z0.d, [z0.d, z0.d, sxtw #3]
+adr z0.d, [z0.d, z0.d, sxtw]
+adr z0.d, [z0.d, z0.d, uxtw #1]
+adr z0.d, [z0.d, z0.d, uxtw #2]
+adr z0.d, [z0.d, z0.d, uxtw #3]
+adr z0.d, [z0.d, z0.d, uxtw]
+adr z0.d, [z0.d, z0.d]
+adr z0.s, [z0.s, z0.s, lsl #1]
+adr z0.s, [z0.s, z0.s, lsl #2]
+adr z0.s, [z0.s, z0.s, lsl #3]
+adr z0.s, [z0.s, z0.s]
+aesd z0.b, z0.b, z31.b
+aese z0.b, z0.b, z31.b
+aesimc z0.b, z0.b
+aesimc z31.b, z31.b
+aesmc z0.b, z0.b
+aesmc z31.b, z31.b
+and p0.b, p0/z, p0.b, p1.b
+and z0.d, z0.d, #0x6
+and z0.d, z0.d, #0xfffffffffffffff9
+and z0.d, z0.d, z0.d
+and z0.s, z0.s, #0x6
+and z0.s, z0.s, #0xfffffff9
+and z23.d, z13.d, z8.d
+and z23.h, z23.h, #0x6
+and z23.h, z23.h, #0xfff9
+and z31.b, p7/m, z31.b, z31.b
+and z31.d, p7/m, z31.d, z31.d
+and z31.h, p7/m, z31.h, z31.h
+and z31.s, p7/m, z31.s, z31.s
+and z5.b, z5.b, #0x6
+and z5.b, z5.b, #0xf9
+ands p0.b, p0/z, p0.b, p1.b
+andv b0, p7, z31.b
+andv d0, p7, z31.d
+andv h0, p7, z31.h
+andv s0, p7, z31.s
+asr z0.b, p0/m, z0.b, #1
+asr z0.b, p0/m, z0.b, z0.b
+asr z0.b, p0/m, z0.b, z1.d
+asr z0.b, z0.b, #1
+asr z0.b, z1.b, z2.d
+asr z0.d, p0/m, z0.d, #1
+asr z0.d, p0/m, z0.d, z0.d
+asr z0.d, z0.d, #1
+asr z0.h, p0/m, z0.h, #1
+asr z0.h, p0/m, z0.h, z0.h
+asr z0.h, p0/m, z0.h, z1.d
+asr z0.h, z0.h, #1
+asr z0.h, z1.h, z2.d
+asr z0.s, p0/m, z0.s, #1
+asr z0.s, p0/m, z0.s, z0.s
+asr z0.s, p0/m, z0.s, z1.d
+asr z0.s, z0.s, #1
+asr z0.s, z1.s, z2.d
+asr z31.b, p0/m, z31.b, #8
+asr z31.b, z31.b, #8
+asr z31.d, p0/m, z31.d, #64
+asr z31.d, z31.d, #64
+asr z31.h, p0/m, z31.h, #16
+asr z31.h, z31.h, #16
+asr z31.s, p0/m, z31.s, #32
+asr z31.s, z31.s, #32
+asrd z0.b, p0/m, z0.b, #1
+asrd z0.d, p0/m, z0.d, #1
+asrd z0.h, p0/m, z0.h, #1
+asrd z0.s, p0/m, z0.s, #1
+asrd z31.b, p0/m, z31.b, #8
+asrd z31.d, p0/m, z31.d, #64
+asrd z31.h, p0/m, z31.h, #16
+asrd z31.s, p0/m, z31.s, #32
+asrr z0.b, p0/m, z0.b, z0.b
+asrr z0.d, p0/m, z0.d, z0.d
+asrr z0.h, p0/m, z0.h, z0.h
+asrr z0.s, p0/m, z0.s, z0.s
+bcax z29.d, z29.d, z30.d, z31.d
+bdep z0.b, z1.b, z31.b
+bdep z0.d, z1.d, z31.d
+bdep z0.h, z1.h, z31.h
+bdep z0.s, z1.s, z31.s
+bext z0.b, z1.b, z31.b
+bext z0.d, z1.d, z31.d
+bext z0.h, z1.h, z31.h
+bext z0.s, z1.s, z31.s
+bfcvt z0.h, p0/m, z1.s
+bfcvtnt z0.h, p0/m, z1.s
+bfdot z0.s, z1.h, z2.h
+bfdot z0.s, z1.h, z2.h[0]
+bfdot z0.s, z1.h, z2.h[3]
+bfmlalb z0.s, z1.h, z2.h
+bfmlalb z0.s, z1.h, z2.h[0]
+bfmlalb z0.s, z1.h, z2.h[7]
+bfmlalb z10.s, z21.h, z14.h
+bfmlalb z21.s, z14.h, z3.h[2]
+bfmlalt z0.s, z1.h, z2.h
+bfmlalt z0.s, z1.h, z2.h[0]
+bfmlalt z0.s, z1.h, z2.h[7]
+bfmlalt z0.s, z1.h, z7.h[7]
+bfmlalt z14.s, z10.h, z21.h
+bfmmla z0.s, z1.h, z2.h
+bgrp z0.b, z1.b, z31.b
+bgrp z0.d, z1.d, z31.d
+bgrp z0.h, z1.h, z31.h
+bgrp z0.s, z1.s, z31.s
+bic p0.b, p0/z, p0.b, p0.b
+bic p15.b, p15/z, p15.b, p15.b
+bic z0.d, z0.d, z0.d
+bic z23.d, z13.d, z8.d
+bic z31.b, p7/m, z31.b, z31.b
+bic z31.d, p7/m, z31.d, z31.d
+bic z31.h, p7/m, z31.h, z31.h
+bic z31.s, p7/m, z31.s, z31.s
+bics p0.b, p0/z, p0.b, p0.b
+bics p15.b, p15/z, p15.b, p15.b
+brka p0.b, p15/m, p15.b
+brka p0.b, p15/z, p15.b
+brkas p0.b, p15/z, p15.b
+brkb p0.b, p15/m, p15.b
+brkb p0.b, p15/z, p15.b
+brkbs p0.b, p15/z, p15.b
+brkn p0.b, p15/z, p1.b, p0.b
+brkn p15.b, p15/z, p15.b, p15.b
+brkns p0.b, p15/z, p1.b, p0.b
+brkns p15.b, p15/z, p15.b, p15.b
+brkpa p0.b, p15/z, p1.b, p2.b
+brkpa p15.b, p15/z, p15.b, p15.b
+brkpas p0.b, p15/z, p1.b, p2.b
+brkpas p15.b, p15/z, p15.b, p15.b
+brkpb p0.b, p15/z, p1.b, p2.b
+brkpb p15.b, p15/z, p15.b, p15.b
+brkpbs p0.b, p15/z, p1.b, p2.b
+brkpbs p15.b, p15/z, p15.b, p15.b
+bsl z0.d, z0.d, z1.d, z2.d
+bsl1n z0.d, z0.d, z1.d, z2.d
+bsl2n z0.d, z0.d, z1.d, z2.d
+cadd z0.b, z0.b, z0.b, #90
+cadd z0.d, z0.d, z0.d, #90
+cadd z0.h, z0.h, z0.h, #90
+cadd z0.s, z0.s, z0.s, #90
+cadd z31.b, z31.b, z31.b, #270
+cadd z31.d, z31.d, z31.d, #270
+cadd z31.h, z31.h, z31.h, #270
+cadd z31.s, z31.s, z31.s, #270
+cdot z0.d, z1.h, z15.h[1], #0
+cdot z0.d, z1.h, z31.h, #0
+cdot z0.d, z1.h, z31.h, #180
+cdot z0.d, z1.h, z31.h, #270
+cdot z0.d, z1.h, z31.h, #90
+cdot z0.s, z1.b, z31.b, #0
+cdot z0.s, z1.b, z7.b[3], #0
+cdot z29.d, z30.h, z0.h[0], #180
+cdot z31.d, z30.h, z7.h[1], #270
+cdot z5.d, z6.h, z3.h[0], #90
+clasta b0, p7, b0, z31.b
+clasta d0, p7, d0, z31.d
+clasta h0, p7, h0, z31.h
+clasta s0, p7, s0, z31.s
+clasta w0, p7, w0, z31.b
+clasta w0, p7, w0, z31.h
+clasta w0, p7, w0, z31.s
+clasta x0, p7, x0, z31.d
+clasta z0.b, p7, z0.b, z31.b
+clasta z0.d, p7, z0.d, z31.d
+clasta z0.h, p7, z0.h, z31.h
+clasta z0.s, p7, z0.s, z31.s
+clastb b0, p7, b0, z31.b
+clastb d0, p7, d0, z31.d
+clastb h0, p7, h0, z31.h
+clastb s0, p7, s0, z31.s
+clastb w0, p7, w0, z31.b
+clastb w0, p7, w0, z31.h
+clastb w0, p7, w0, z31.s
+clastb x0, p7, x0, z31.d
+clastb z0.b, p7, z0.b, z31.b
+clastb z0.d, p7, z0.d, z31.d
+clastb z0.h, p7, z0.h, z31.h
+clastb z0.s, p7, z0.s, z31.s
+cls z31.b, p7/m, z31.b
+cls z31.d, p7/m, z31.d
+cls z31.h, p7/m, z31.h
+cls z31.s, p7/m, z31.s
+clz z31.b, p7/m, z31.b
+clz z31.d, p7/m, z31.d
+clz z31.h, p7/m, z31.h
+clz z31.s, p7/m, z31.s
+cmla z0.b, z1.b, z2.b, #0
+cmla z0.d, z1.d, z2.d, #0
+cmla z0.h, z1.h, z2.h, #0
+cmla z0.h, z1.h, z2.h[0], #0
+cmla z0.s, z1.s, z2.s, #0
+cmla z0.s, z1.s, z2.s[0], #0
+cmla z15.b, z16.b, z17.b, #270
+cmla z15.d, z16.d, z17.d, #270
+cmla z15.h, z16.h, z17.h, #270
+cmla z15.s, z16.s, z17.s, #270
+cmla z29.b, z30.b, z31.b, #90
+cmla z29.d, z30.d, z31.d, #90
+cmla z29.h, z30.h, z31.h, #90
+cmla z29.s, z30.s, z31.s, #90
+cmla z31.b, z31.b, z31.b, #180
+cmla z31.d, z31.d, z31.d, #180
+cmla z31.h, z30.h, z7.h[0], #180
+cmla z31.h, z31.h, z31.h, #180
+cmla z31.s, z30.s, z7.s[0], #180
+cmla z31.s, z31.s, z31.s, #180
+cmpeq p0.b, p0/z, z0.b, #-16
+cmpeq p0.b, p0/z, z0.b, #15
+cmpeq p0.b, p0/z, z0.b, z0.b
+cmpeq p0.b, p0/z, z0.b, z0.d
+cmpeq p0.d, p0/z, z0.d, #-16
+cmpeq p0.d, p0/z, z0.d, #15
+cmpeq p0.d, p0/z, z0.d, z0.d
+cmpeq p0.h, p0/z, z0.h, #-16
+cmpeq p0.h, p0/z, z0.h, #15
+cmpeq p0.h, p0/z, z0.h, z0.d
+cmpeq p0.h, p0/z, z0.h, z0.h
+cmpeq p0.s, p0/z, z0.s, #-16
+cmpeq p0.s, p0/z, z0.s, #15
+cmpeq p0.s, p0/z, z0.s, z0.d
+cmpeq p0.s, p0/z, z0.s, z0.s
+cmpge p0.b, p0/z, z0.b, #-16
+cmpge p0.b, p0/z, z0.b, #15
+cmpge p0.b, p0/z, z0.b, z0.b
+cmpge p0.b, p0/z, z0.b, z0.d
+cmpge p0.b, p0/z, z1.b, z0.b
+cmpge p0.d, p0/z, z0.d, #-16
+cmpge p0.d, p0/z, z0.d, #15
+cmpge p0.d, p0/z, z0.d, z0.d
+cmpge p0.d, p0/z, z1.d, z0.d
+cmpge p0.h, p0/z, z0.h, #-16
+cmpge p0.h, p0/z, z0.h, #15
+cmpge p0.h, p0/z, z0.h, z0.d
+cmpge p0.h, p0/z, z0.h, z0.h
+cmpge p0.h, p0/z, z1.h, z0.h
+cmpge p0.s, p0/z, z0.s, #-16
+cmpge p0.s, p0/z, z0.s, #15
+cmpge p0.s, p0/z, z0.s, z0.d
+cmpge p0.s, p0/z, z0.s, z0.s
+cmpge p0.s, p0/z, z1.s, z0.s
+cmpgt p0.b, p0/z, z0.b, #-16
+cmpgt p0.b, p0/z, z0.b, #15
+cmpgt p0.b, p0/z, z0.b, z0.b
+cmpgt p0.b, p0/z, z0.b, z0.d
+cmpgt p0.b, p0/z, z1.b, z0.b
+cmpgt p0.d, p0/z, z0.d, #-16
+cmpgt p0.d, p0/z, z0.d, #15
+cmpgt p0.d, p0/z, z0.d, z0.d
+cmpgt p0.d, p0/z, z1.d, z0.d
+cmpgt p0.h, p0/z, z0.h, #-16
+cmpgt p0.h, p0/z, z0.h, #15
+cmpgt p0.h, p0/z, z0.h, z0.d
+cmpgt p0.h, p0/z, z0.h, z0.h
+cmpgt p0.h, p0/z, z1.h, z0.h
+cmpgt p0.s, p0/z, z0.s, #-16
+cmpgt p0.s, p0/z, z0.s, #15
+cmpgt p0.s, p0/z, z0.s, z0.d
+cmpgt p0.s, p0/z, z0.s, z0.s
+cmpgt p0.s, p0/z, z1.s, z0.s
+cmphi p0.b, p0/z, z0.b, #0
+cmphi p0.b, p0/z, z0.b, #127
+cmphi p0.b, p0/z, z0.b, z0.b
+cmphi p0.b, p0/z, z0.b, z0.d
+cmphi p0.b, p0/z, z1.b, z0.b
+cmphi p0.d, p0/z, z0.d, #0
+cmphi p0.d, p0/z, z0.d, #127
+cmphi p0.d, p0/z, z0.d, z0.d
+cmphi p0.d, p0/z, z1.d, z0.d
+cmphi p0.h, p0/z, z0.h, #0
+cmphi p0.h, p0/z, z0.h, #127
+cmphi p0.h, p0/z, z0.h, z0.d
+cmphi p0.h, p0/z, z0.h, z0.h
+cmphi p0.h, p0/z, z1.h, z0.h
+cmphi p0.s, p0/z, z0.s, #0
+cmphi p0.s, p0/z, z0.s, #127
+cmphi p0.s, p0/z, z0.s, z0.d
+cmphi p0.s, p0/z, z0.s, z0.s
+cmphi p0.s, p0/z, z1.s, z0.s
+cmphs p0.b, p0/z, z0.b, #0
+cmphs p0.b, p0/z, z0.b, #127
+cmphs p0.b, p0/z, z0.b, z0.b
+cmphs p0.b, p0/z, z0.b, z0.d
+cmphs p0.b, p0/z, z1.b, z0.b
+cmphs p0.d, p0/z, z0.d, #0
+cmphs p0.d, p0/z, z0.d, #127
+cmphs p0.d, p0/z, z0.d, z0.d
+cmphs p0.d, p0/z, z1.d, z0.d
+cmphs p0.h, p0/z, z0.h, #0
+cmphs p0.h, p0/z, z0.h, #127
+cmphs p0.h, p0/z, z0.h, z0.d
+cmphs p0.h, p0/z, z0.h, z0.h
+cmphs p0.h, p0/z, z1.h, z0.h
+cmphs p0.s, p0/z, z0.s, #0
+cmphs p0.s, p0/z, z0.s, #127
+cmphs p0.s, p0/z, z0.s, z0.d
+cmphs p0.s, p0/z, z0.s, z0.s
+cmphs p0.s, p0/z, z1.s, z0.s
+cmple p0.b, p0/z, z0.b, #-16
+cmple p0.b, p0/z, z0.b, #15
+cmple p0.b, p0/z, z0.b, z0.d
+cmple p0.d, p0/z, z0.d, #-16
+cmple p0.d, p0/z, z0.d, #15
+cmple p0.h, p0/z, z0.h, #-16
+cmple p0.h, p0/z, z0.h, #15
+cmple p0.h, p0/z, z0.h, z0.d
+cmple p0.s, p0/z, z0.s, #-16
+cmple p0.s, p0/z, z0.s, #15
+cmple p0.s, p0/z, z0.s, z0.d
+cmplo p0.b, p0/z, z0.b, #0
+cmplo p0.b, p0/z, z0.b, #127
+cmplo p0.b, p0/z, z0.b, z0.d
+cmplo p0.d, p0/z, z0.d, #0
+cmplo p0.d, p0/z, z0.d, #127
+cmplo p0.h, p0/z, z0.h, #0
+cmplo p0.h, p0/z, z0.h, #127
+cmplo p0.h, p0/z, z0.h, z0.d
+cmplo p0.s, p0/z, z0.s, #0
+cmplo p0.s, p0/z, z0.s, #127
+cmplo p0.s, p0/z, z0.s, z0.d
+cmpls p0.b, p0/z, z0.b, #0
+cmpls p0.b, p0/z, z0.b, #127
+cmpls p0.b, p0/z, z0.b, z0.d
+cmpls p0.d, p0/z, z0.d, #0
+cmpls p0.d, p0/z, z0.d, #127
+cmpls p0.h, p0/z, z0.h, #0
+cmpls p0.h, p0/z, z0.h, #127
+cmpls p0.h, p0/z, z0.h, z0.d
+cmpls p0.s, p0/z, z0.s, #0
+cmpls p0.s, p0/z, z0.s, #127
+cmpls p0.s, p0/z, z0.s, z0.d
+cmplt p0.b, p0/z, z0.b, #-16
+cmplt p0.b, p0/z, z0.b, #15
+cmplt p0.b, p0/z, z0.b, z0.d
+cmplt p0.d, p0/z, z0.d, #-16
+cmplt p0.d, p0/z, z0.d, #15
+cmplt p0.h, p0/z, z0.h, #-16
+cmplt p0.h, p0/z, z0.h, #15
+cmplt p0.h, p0/z, z0.h, z0.d
+cmplt p0.s, p0/z, z0.s, #-16
+cmplt p0.s, p0/z, z0.s, #15
+cmplt p0.s, p0/z, z0.s, z0.d
+cmpne p0.b, p0/z, z0.b, #-16
+cmpne p0.b, p0/z, z0.b, #15
+cmpne p0.b, p0/z, z0.b, z0.b
+cmpne p0.b, p0/z, z0.b, z0.d
+cmpne p0.d, p0/z, z0.d, #-16
+cmpne p0.d, p0/z, z0.d, #15
+cmpne p0.d, p0/z, z0.d, z0.d
+cmpne p0.h, p0/z, z0.h, #-16
+cmpne p0.h, p0/z, z0.h, #15
+cmpne p0.h, p0/z, z0.h, z0.d
+cmpne p0.h, p0/z, z0.h, z0.h
+cmpne p0.s, p0/z, z0.s, #-16
+cmpne p0.s, p0/z, z0.s, #15
+cmpne p0.s, p0/z, z0.s, z0.d
+cmpne p0.s, p0/z, z0.s, z0.s
+cnot z31.b, p7/m, z31.b
+cnot z31.d, p7/m, z31.d
+cnot z31.h, p7/m, z31.h
+cnot z31.s, p7/m, z31.s
+cnt z31.b, p7/m, z31.b
+cnt z31.d, p7/m, z31.d
+cnt z31.h, p7/m, z31.h
+cnt z31.s, p7/m, z31.s
+cntb x0
+cntb x0, #28
+cntb x0, all, mul #16
+cntb x0, pow2
+cntd x0
+cntd x0, #28
+cntd x0, all, mul #16
+cntd x0, pow2
+cnth x0
+cnth x0, #28
+cnth x0, all, mul #16
+cnth x0, pow2
+cntp x0, p15, p0.b
+cntp x0, p15, p0.d
+cntp x0, p15, p0.h
+cntp x0, p15, p0.s
+cntw x0
+cntw x0, #28
+cntw x0, all, mul #16
+cntw x0, pow2
+compact z31.d, p7, z31.d
+compact z31.s, p7, z31.s
+ctermeq w30, wzr
+ctermeq wzr, w30
+ctermeq x30, xzr
+ctermeq xzr, x30
+ctermne w30, wzr
+ctermne wzr, w30
+ctermne x30, xzr
+ctermne xzr, x30
+decb x0
+decb x0, #14
+decb x0, all, mul #16
+decb x0, pow2
+decb x0, vl1
+decd x0
+decd x0, #14
+decd x0, all, mul #16
+decd x0, pow2
+decd x0, vl1
+dech x0
+dech x0, #14
+dech x0, all, mul #16
+dech x0, pow2
+dech x0, vl1
+decp x0, p0.b
+decp x0, p0.d
+decp x0, p0.h
+decp x0, p0.s
+decp xzr, p15.b
+decp xzr, p15.d
+decp xzr, p15.h
+decp xzr, p15.s
+decp z31.d, p15.d
+decp z31.h, p15.h
+decp z31.s, p15.s
+decw x0
+decw x0, #14
+decw x0, all, mul #16
+decw x0, pow2
+decw x0, vl1
+dupm z0.d, #0xfffffffffffffff9
+dupm z0.s, #0xfffffff9
+dupm z23.h, #0xfff9
+dupm z5.b, #0xf9
+eor p0.b, p0/z, p0.b, p1.b
+eor z0.d, z0.d, #0x6
+eor z0.d, z0.d, #0xfffffffffffffff9
+eor z0.d, z0.d, z0.d
+eor z0.s, z0.s, #0x6
+eor z0.s, z0.s, #0xfffffff9
+eor z23.d, z13.d, z8.d
+eor z23.h, z23.h, #0x6
+eor z23.h, z23.h, #0xfff9
+eor z31.b, p7/m, z31.b, z31.b
+eor z31.d, p7/m, z31.d, z31.d
+eor z31.h, p7/m, z31.h, z31.h
+eor z31.s, p7/m, z31.s, z31.s
+eor z5.b, z5.b, #0x6
+eor z5.b, z5.b, #0xf9
+eor3 z29.d, z29.d, z30.d, z31.d
+eorbt z0.b, z1.b, z31.b
+eorbt z0.d, z1.d, z31.d
+eorbt z0.h, z1.h, z31.h
+eorbt z0.s, z1.s, z31.s
+eors p0.b, p0/z, p0.b, p1.b
+eortb z0.b, z1.b, z31.b
+eortb z0.d, z1.d, z31.d
+eortb z0.h, z1.h, z31.h
+eortb z0.s, z1.s, z31.s
+eorv b0, p7, z31.b
+eorv d0, p7, z31.d
+eorv h0, p7, z31.h
+eorv s0, p7, z31.s
+ext z0.b, { z1.b, z2.b }, #0
+ext z31.b, z31.b, z0.b, #0
+ext z31.b, z31.b, z0.b, #255
+ext z31.b, { z30.b, z31.b }, #255
+fabd z0.d, p7/m, z0.d, z31.d
+fabd z0.h, p7/m, z0.h, z31.h
+fabd z0.s, p7/m, z0.s, z31.s
+fabs z31.d, p7/m, z31.d
+fabs z31.h, p7/m, z31.h
+fabs z31.s, p7/m, z31.s
+facge p0.d, p0/z, z0.d, z1.d
+facge p0.d, p0/z, z1.d, z0.d
+facge p0.h, p0/z, z0.h, z1.h
+facge p0.h, p0/z, z1.h, z0.h
+facge p0.s, p0/z, z0.s, z1.s
+facge p0.s, p0/z, z1.s, z0.s
+facgt p0.d, p0/z, z0.d, z1.d
+facgt p0.d, p0/z, z1.d, z0.d
+facgt p0.h, p0/z, z0.h, z1.h
+facgt p0.h, p0/z, z1.h, z0.h
+facgt p0.s, p0/z, z0.s, z1.s
+facgt p0.s, p0/z, z1.s, z0.s
+fadd z0.d, p0/m, z0.d, #0.5
+fadd z0.d, p7/m, z0.d, z31.d
+fadd z0.d, z1.d, z31.d
+fadd z0.h, p0/m, z0.h, #0.5
+fadd z0.h, p7/m, z0.h, z31.h
+fadd z0.h, z1.h, z31.h
+fadd z0.s, p0/m, z0.s, #0.5
+fadd z0.s, p7/m, z0.s, z31.s
+fadd z0.s, z1.s, z31.s
+fadd z31.d, p7/m, z31.d, #1.0
+fadd z31.h, p7/m, z31.h, #1.0
+fadd z31.s, p7/m, z31.s, #1.0
+fadda d0, p7, d0, z31.d
+fadda h0, p7, h0, z31.h
+fadda s0, p7, s0, z31.s
+faddp z0.h, p0/m, z0.h, z1.h
+faddp z29.s, p3/m, z29.s, z30.s
+faddp z31.d, p7/m, z31.d, z30.d
+faddv d0, p7, z31.d
+faddv h0, p7, z31.h
+faddv s0, p7, z31.s
+fcadd z0.d, p0/m, z0.d, z0.d, #90
+fcadd z0.h, p0/m, z0.h, z0.h, #90
+fcadd z0.s, p0/m, z0.s, z0.s, #90
+fcadd z31.d, p7/m, z31.d, z31.d, #270
+fcadd z31.h, p7/m, z31.h, z31.h, #270
+fcadd z31.s, p7/m, z31.s, z31.s, #270
+fcmeq p0.d, p0/z, z0.d, #0.0
+fcmeq p0.d, p0/z, z0.d, z1.d
+fcmeq p0.h, p0/z, z0.h, #0.0
+fcmeq p0.h, p0/z, z0.h, z1.h
+fcmeq p0.s, p0/z, z0.s, #0.0
+fcmeq p0.s, p0/z, z0.s, z1.s
+fcmge p0.d, p0/z, z0.d, #0.0
+fcmge p0.d, p0/z, z0.d, z1.d
+fcmge p0.d, p0/z, z1.d, z0.d
+fcmge p0.h, p0/z, z0.h, #0.0
+fcmge p0.h, p0/z, z0.h, z1.h
+fcmge p0.h, p0/z, z1.h, z0.h
+fcmge p0.s, p0/z, z0.s, #0.0
+fcmge p0.s, p0/z, z0.s, z1.s
+fcmge p0.s, p0/z, z1.s, z0.s
+fcmgt p0.d, p0/z, z0.d, #0.0
+fcmgt p0.d, p0/z, z0.d, z1.d
+fcmgt p0.d, p0/z, z1.d, z0.d
+fcmgt p0.h, p0/z, z0.h, #0.0
+fcmgt p0.h, p0/z, z0.h, z1.h
+fcmgt p0.h, p0/z, z1.h, z0.h
+fcmgt p0.s, p0/z, z0.s, #0.0
+fcmgt p0.s, p0/z, z0.s, z1.s
+fcmgt p0.s, p0/z, z1.s, z0.s
+fcmla z0.d, p0/m, z0.d, z0.d, #0
+fcmla z0.d, p0/m, z1.d, z2.d, #90
+fcmla z0.h, p0/m, z0.h, z0.h, #0
+fcmla z0.h, p0/m, z1.h, z2.h, #90
+fcmla z0.h, z0.h, z0.h[0], #0
+fcmla z0.s, p0/m, z0.s, z0.s, #0
+fcmla z0.s, p0/m, z1.s, z2.s, #90
+fcmla z21.s, z10.s, z5.s[1], #90
+fcmla z23.s, z13.s, z8.s[0], #270
+fcmla z29.d, p7/m, z30.d, z31.d, #180
+fcmla z29.h, p7/m, z30.h, z31.h, #180
+fcmla z29.s, p7/m, z30.s, z31.s, #180
+fcmla z31.d, p7/m, z31.d, z31.d, #270
+fcmla z31.h, p7/m, z31.h, z31.h, #270
+fcmla z31.h, z31.h, z7.h[3], #270
+fcmla z31.s, p7/m, z31.s, z31.s, #270
+fcmle p0.d, p0/z, z0.d, #0.0
+fcmle p0.h, p0/z, z0.h, #0.0
+fcmle p0.s, p0/z, z0.s, #0.0
+fcmlt p0.d, p0/z, z0.d, #0.0
+fcmlt p0.h, p0/z, z0.h, #0.0
+fcmlt p0.s, p0/z, z0.s, #0.0
+fcmne p0.d, p0/z, z0.d, #0.0
+fcmne p0.d, p0/z, z0.d, z1.d
+fcmne p0.h, p0/z, z0.h, #0.0
+fcmne p0.h, p0/z, z0.h, z1.h
+fcmne p0.s, p0/z, z0.s, #0.0
+fcmne p0.s, p0/z, z0.s, z1.s
+fcmuo p0.d, p0/z, z0.d, z1.d
+fcmuo p0.h, p0/z, z0.h, z1.h
+fcmuo p0.s, p0/z, z0.s, z1.s
+fcvt z0.d, p0/m, z0.h
+fcvt z0.d, p0/m, z0.s
+fcvt z0.h, p0/m, z0.d
+fcvt z0.h, p0/m, z0.s
+fcvt z0.s, p0/m, z0.d
+fcvt z0.s, p0/m, z0.h
+fcvtlt z0.s, p0/m, z1.h
+fcvtlt z30.d, p7/m, z31.s
+fcvtnt z0.h, p0/m, z1.s
+fcvtnt z30.s, p7/m, z31.d
+fcvtx z0.s, p0/m, z0.d
+fcvtx z30.s, p7/m, z31.d
+fcvtxnt z0.s, p0/m, z1.d
+fcvtxnt z30.s, p7/m, z31.d
+fcvtzs z0.d, p0/m, z0.d
+fcvtzs z0.d, p0/m, z0.h
+fcvtzs z0.d, p0/m, z0.s
+fcvtzs z0.h, p0/m, z0.h
+fcvtzs z0.s, p0/m, z0.d
+fcvtzs z0.s, p0/m, z0.h
+fcvtzs z0.s, p0/m, z0.s
+fcvtzu z0.d, p0/m, z0.d
+fcvtzu z0.d, p0/m, z0.h
+fcvtzu z0.d, p0/m, z0.s
+fcvtzu z0.h, p0/m, z0.h
+fcvtzu z0.s, p0/m, z0.d
+fcvtzu z0.s, p0/m, z0.h
+fcvtzu z0.s, p0/m, z0.s
+fdiv z0.d, p7/m, z0.d, z31.d
+fdiv z0.h, p7/m, z0.h, z31.h
+fdiv z0.s, p7/m, z0.s, z31.s
+fdivr z0.d, p7/m, z0.d, z31.d
+fdivr z0.h, p7/m, z0.h, z31.h
+fdivr z0.s, p7/m, z0.s, z31.s
+fexpa z0.d, z31.d
+fexpa z0.h, z31.h
+fexpa z0.s, z31.s
+flogb z31.d, p7/m, z31.d
+flogb z31.h, p7/m, z31.h
+flogb z31.s, p7/m, z31.s
+fmad z0.d, p7/m, z1.d, z31.d
+fmad z0.h, p7/m, z1.h, z31.h
+fmad z0.s, p7/m, z1.s, z31.s
+fmax z0.d, p0/m, z0.d, #0.0
+fmax z0.d, p7/m, z0.d, z31.d
+fmax z0.h, p0/m, z0.h, #0.0
+fmax z0.h, p7/m, z0.h, z31.h
+fmax z0.s, p0/m, z0.s, #0.0
+fmax z0.s, p7/m, z0.s, z31.s
+fmax z31.d, p7/m, z31.d, #1.0
+fmax z31.h, p7/m, z31.h, #1.0
+fmax z31.s, p7/m, z31.s, #1.0
+fmaxnm z0.d, p0/m, z0.d, #0.0
+fmaxnm z0.d, p7/m, z0.d, z31.d
+fmaxnm z0.h, p0/m, z0.h, #0.0
+fmaxnm z0.h, p7/m, z0.h, z31.h
+fmaxnm z0.s, p0/m, z0.s, #0.0
+fmaxnm z0.s, p7/m, z0.s, z31.s
+fmaxnm z31.d, p7/m, z31.d, #1.0
+fmaxnm z31.h, p7/m, z31.h, #1.0
+fmaxnm z31.s, p7/m, z31.s, #1.0
+fmaxnmp z0.h, p0/m, z0.h, z1.h
+fmaxnmp z29.s, p3/m, z29.s, z30.s
+fmaxnmp z31.d, p7/m, z31.d, z30.d
+fmaxnmv d0, p7, z31.d
+fmaxnmv h0, p7, z31.h
+fmaxnmv s0, p7, z31.s
+fmaxp z0.h, p0/m, z0.h, z1.h
+fmaxp z29.s, p3/m, z29.s, z30.s
+fmaxp z31.d, p7/m, z31.d, z30.d
+fmaxv d0, p7, z31.d
+fmaxv h0, p7, z31.h
+fmaxv s0, p7, z31.s
+fmin z0.d, p0/m, z0.d, #0.0
+fmin z0.d, p7/m, z0.d, z31.d
+fmin z0.h, p0/m, z0.h, #0.0
+fmin z0.h, p7/m, z0.h, z31.h
+fmin z0.s, p0/m, z0.s, #0.0
+fmin z0.s, p7/m, z0.s, z31.s
+fmin z31.d, p7/m, z31.d, #1.0
+fmin z31.h, p7/m, z31.h, #1.0
+fmin z31.s, p7/m, z31.s, #1.0
+fminnm z0.d, p0/m, z0.d, #0.0
+fminnm z0.d, p7/m, z0.d, z31.d
+fminnm z0.h, p0/m, z0.h, #0.0
+fminnm z0.h, p7/m, z0.h, z31.h
+fminnm z0.s, p0/m, z0.s, #0.0
+fminnm z0.s, p7/m, z0.s, z31.s
+fminnm z31.d, p7/m, z31.d, #1.0
+fminnm z31.h, p7/m, z31.h, #1.0
+fminnm z31.s, p7/m, z31.s, #1.0
+fminnmp z0.h, p0/m, z0.h, z1.h
+fminnmp z29.s, p3/m, z29.s, z30.s
+fminnmp z31.d, p7/m, z31.d, z30.d
+fminnmv d0, p7, z31.d
+fminnmv h0, p7, z31.h
+fminnmv s0, p7, z31.s
+fminp z0.h, p0/m, z0.h, z1.h
+fminp z29.s, p3/m, z29.s, z30.s
+fminp z31.d, p7/m, z31.d, z30.d
+fminv d0, p7, z31.d
+fminv h0, p7, z31.h
+fminv s0, p7, z31.s
+fmla z0.d, p7/m, z1.d, z31.d
+fmla z0.d, z1.d, z7.d[1]
+fmla z0.h, p7/m, z1.h, z31.h
+fmla z0.h, z1.h, z7.h[7]
+fmla z0.s, p7/m, z1.s, z31.s
+fmla z0.s, z1.s, z7.s[3]
+fmlalb z0.s, z1.h, z7.h[0]
+fmlalb z29.s, z30.h, z31.h
+fmlalb z30.s, z31.h, z7.h[7]
+fmlalt z0.s, z1.h, z7.h[0]
+fmlalt z29.s, z30.h, z31.h
+fmlalt z30.s, z31.h, z7.h[7]
+fmls z0.d, p7/m, z1.d, z31.d
+fmls z0.d, z1.d, z7.d[1]
+fmls z0.h, p7/m, z1.h, z31.h
+fmls z0.h, z1.h, z7.h[7]
+fmls z0.s, p7/m, z1.s, z31.s
+fmls z0.s, z1.s, z7.s[3]
+fmlslb z0.s, z1.h, z7.h[0]
+fmlslb z29.s, z30.h, z31.h
+fmlslb z30.s, z31.h, z7.h[7]
+fmlslt z0.s, z1.h, z7.h[0]
+fmlslt z29.s, z30.h, z31.h
+fmlslt z30.s, z31.h, z7.h[7]
+fmov z0.d, #-10.00000000
+fmov z0.d, #0.12500000
+fmov z0.d, p0/m, #-10.00000000
+fmov z0.d, p0/m, #0.12500000
+fmov z0.h, #-0.12500000
+fmov z0.h, p0/m, #-0.12500000
+fmov z0.s, #-0.12500000
+fmov z0.s, p0/m, #-0.12500000
+fmsb z0.d, p7/m, z1.d, z31.d
+fmsb z0.h, p7/m, z1.h, z31.h
+fmsb z0.s, p7/m, z1.s, z31.s
+fmul z0.d, p0/m, z0.d, #0.5
+fmul z0.d, p7/m, z0.d, z31.d
+fmul z0.d, z0.d, z0.d[0]
+fmul z0.d, z1.d, z31.d
+fmul z0.h, p0/m, z0.h, #0.5
+fmul z0.h, p7/m, z0.h, z31.h
+fmul z0.h, z0.h, z0.h[0]
+fmul z0.h, z1.h, z31.h
+fmul z0.s, p0/m, z0.s, #0.5
+fmul z0.s, p7/m, z0.s, z31.s
+fmul z0.s, z0.s, z0.s[0]
+fmul z0.s, z1.s, z31.s
+fmul z31.d, p7/m, z31.d, #2.0
+fmul z31.d, z31.d, z15.d[1]
+fmul z31.h, p7/m, z31.h, #2.0
+fmul z31.h, z31.h, z7.h[7]
+fmul z31.s, p7/m, z31.s, #2.0
+fmul z31.s, z31.s, z7.s[3]
+fmulx z0.d, p7/m, z0.d, z31.d
+fmulx z0.h, p7/m, z0.h, z31.h
+fmulx z0.s, p7/m, z0.s, z31.s
+fneg z31.d, p7/m, z31.d
+fneg z31.h, p7/m, z31.h
+fneg z31.s, p7/m, z31.s
+fnmad z0.d, p7/m, z1.d, z31.d
+fnmad z0.h, p7/m, z1.h, z31.h
+fnmad z0.s, p7/m, z1.s, z31.s
+fnmla z0.d, p7/m, z1.d, z31.d
+fnmla z0.h, p7/m, z1.h, z31.h
+fnmla z0.s, p7/m, z1.s, z31.s
+fnmls z0.d, p7/m, z1.d, z31.d
+fnmls z0.h, p7/m, z1.h, z31.h
+fnmls z0.s, p7/m, z1.s, z31.s
+fnmsb z0.d, p7/m, z1.d, z31.d
+fnmsb z0.h, p7/m, z1.h, z31.h
+fnmsb z0.s, p7/m, z1.s, z31.s
+frecpe z0.d, z31.d
+frecpe z0.h, z31.h
+frecpe z0.s, z31.s
+frecps z0.d, z1.d, z31.d
+frecps z0.h, z1.h, z31.h
+frecps z0.s, z1.s, z31.s
+frecpx z31.d, p7/m, z31.d
+frecpx z31.h, p7/m, z31.h
+frecpx z31.s, p7/m, z31.s
+frinta z31.d, p7/m, z31.d
+frinta z31.h, p7/m, z31.h
+frinta z31.s, p7/m, z31.s
+frinti z31.d, p7/m, z31.d
+frinti z31.h, p7/m, z31.h
+frinti z31.s, p7/m, z31.s
+frintm z31.d, p7/m, z31.d
+frintm z31.h, p7/m, z31.h
+frintm z31.s, p7/m, z31.s
+frintn z31.d, p7/m, z31.d
+frintn z31.h, p7/m, z31.h
+frintn z31.s, p7/m, z31.s
+frintp z31.d, p7/m, z31.d
+frintp z31.h, p7/m, z31.h
+frintp z31.s, p7/m, z31.s
+frintx z31.d, p7/m, z31.d
+frintx z31.h, p7/m, z31.h
+frintx z31.s, p7/m, z31.s
+frintz z31.d, p7/m, z31.d
+frintz z31.h, p7/m, z31.h
+frintz z31.s, p7/m, z31.s
+frsqrte z0.d, z31.d
+frsqrte z0.h, z31.h
+frsqrte z0.s, z31.s
+frsqrts z0.d, z1.d, z31.d
+frsqrts z0.h, z1.h, z31.h
+frsqrts z0.s, z1.s, z31.s
+fscale z0.d, p7/m, z0.d, z31.d
+fscale z0.h, p7/m, z0.h, z31.h
+fscale z0.s, p7/m, z0.s, z31.s
+fsqrt z31.d, p7/m, z31.d
+fsqrt z31.h, p7/m, z31.h
+fsqrt z31.s, p7/m, z31.s
+fsub z0.d, p0/m, z0.d, #0.5
+fsub z0.d, p7/m, z0.d, z31.d
+fsub z0.d, z1.d, z31.d
+fsub z0.h, p0/m, z0.h, #0.5
+fsub z0.h, p7/m, z0.h, z31.h
+fsub z0.h, z1.h, z31.h
+fsub z0.s, p0/m, z0.s, #0.5
+fsub z0.s, p7/m, z0.s, z31.s
+fsub z0.s, z1.s, z31.s
+fsub z31.d, p7/m, z31.d, #1.0
+fsub z31.h, p7/m, z31.h, #1.0
+fsub z31.s, p7/m, z31.s, #1.0
+fsubr z0.d, p0/m, z0.d, #0.5
+fsubr z0.d, p7/m, z0.d, z31.d
+fsubr z0.h, p0/m, z0.h, #0.5
+fsubr z0.h, p7/m, z0.h, z31.h
+fsubr z0.s, p0/m, z0.s, #0.5
+fsubr z0.s, p7/m, z0.s, z31.s
+fsubr z31.d, p7/m, z31.d, #1.0
+fsubr z31.h, p7/m, z31.h, #1.0
+fsubr z31.s, p7/m, z31.s, #1.0
+ftmad z0.d, z0.d, z31.d, #7
+ftmad z0.h, z0.h, z31.h, #7
+ftmad z0.s, z0.s, z31.s, #7
+ftsmul z0.d, z1.d, z31.d
+ftsmul z0.h, z1.h, z31.h
+ftsmul z0.s, z1.s, z31.s
+ftssel z0.d, z1.d, z31.d
+ftssel z0.h, z1.h, z31.h
+ftssel z0.s, z1.s, z31.s
+histcnt z0.s, p0/z, z1.s, z2.s
+histcnt z29.d, p7/z, z30.d, z31.d
+histseg z0.b, z1.b, z31.b
+incb x0
+incb x0, #14
+incb x0, all, mul #16
+incb x0, pow2
+incb x0, vl1
+incd x0
+incd x0, #14
+incd x0, all, mul #16
+incd x0, pow2
+incd x0, vl1
+incd z0.d
+incd z0.d, all, mul #16
+inch x0
+inch x0, #14
+inch x0, all, mul #16
+inch x0, pow2
+inch x0, vl1
+inch z0.h
+inch z0.h, all, mul #16
+incp x0, p0.b
+incp x0, p0.d
+incp x0, p0.h
+incp x0, p0.s
+incp xzr, p15.b
+incp xzr, p15.d
+incp xzr, p15.h
+incp xzr, p15.s
+incp z31.d, p15.d
+incp z31.h, p15.h
+incp z31.s, p15.s
+incw x0
+incw x0, #14
+incw x0, all, mul #16
+incw x0, pow2
+incw x0, vl1
+incw z0.s
+incw z0.s, all, mul #16
+index z0.b, #0, #0
+index z0.d, #0, #0
+index z0.h, #0, #0
+index z0.h, w0, w0
+index z0.s, #0, #0
+index z21.b, w10, w21
+index z21.d, x10, x21
+index z21.s, w10, w21
+index z23.b, #13, w8
+index z23.b, w13, #8
+index z23.d, #13, x8
+index z23.d, x13, #8
+index z23.h, #13, w8
+index z23.h, w13, #8
+index z23.s, #13, w8
+index z23.s, w13, #8
+index z31.b, #-1, #-1
+index z31.b, #-1, wzr
+index z31.b, wzr, #-1
+index z31.b, wzr, wzr
+index z31.d, #-1, #-1
+index z31.d, #-1, xzr
+index z31.d, xzr, #-1
+index z31.d, xzr, xzr
+index z31.h, #-1, #-1
+index z31.h, #-1, wzr
+index z31.h, wzr, #-1
+index z31.h, wzr, wzr
+index z31.s, #-1, #-1
+index z31.s, #-1, wzr
+index z31.s, wzr, #-1
+index z31.s, wzr, wzr
+insr z0.b, w0
+insr z0.d, x0
+insr z0.h, w0
+insr z0.s, w0
+insr z31.b, b31
+insr z31.b, wzr
+insr z31.d, d31
+insr z31.d, xzr
+insr z31.h, h31
+insr z31.h, wzr
+insr z31.s, s31
+insr z31.s, wzr
+lasta b0, p7, z31.b
+lasta d0, p7, z31.d
+lasta h0, p7, z31.h
+lasta s0, p7, z31.s
+lasta w0, p7, z31.b
+lasta w0, p7, z31.h
+lasta w0, p7, z31.s
+lasta x0, p7, z31.d
+lastb b0, p7, z31.b
+lastb d0, p7, z31.d
+lastb h0, p7, z31.h
+lastb s0, p7, z31.s
+lastb w0, p7, z31.b
+lastb w0, p7, z31.h
+lastb w0, p7, z31.s
+lastb x0, p7, z31.d
+ld1b { z0.b }, p0/z, [sp, x0]
+ld1b { z0.b }, p0/z, [x0, x0]
+ld1b { z0.b }, p0/z, [x0]
+ld1b { z0.d }, p0/z, [x0]
+ld1b { z0.d }, p0/z, [z0.d]
+ld1b { z0.h }, p0/z, [x0]
+ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+ld1b { z0.s }, p0/z, [x0]
+ld1b { z0.s }, p0/z, [z0.s]
+ld1b { z21.b }, p5/z, [x10, #5, mul vl]
+ld1b { z21.d }, p5/z, [x10, #5, mul vl]
+ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1b { z21.h }, p5/z, [x10, #5, mul vl]
+ld1b { z21.s }, p5/z, [x10, #5, mul vl]
+ld1b { z21.s }, p5/z, [x10, x21]
+ld1b { z23.d }, p3/z, [x13, x8]
+ld1b { z31.b }, p7/z, [sp, #-1, mul vl]
+ld1b { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1b { z31.d }, p7/z, [sp, z31.d]
+ld1b { z31.d }, p7/z, [z31.d, #31]
+ld1b { z31.h }, p7/z, [sp, #-1, mul vl]
+ld1b { z31.s }, p7/z, [sp, #-1, mul vl]
+ld1b { z31.s }, p7/z, [z31.s, #31]
+ld1b { z5.h }, p3/z, [x17, x16]
+ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+ld1d { z0.d }, p0/z, [x0]
+ld1d { z0.d }, p0/z, [z0.d]
+ld1d { z21.d }, p5/z, [x10, #5, mul vl]
+ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1d { z23.d }, p3/z, [sp, x8, lsl #3]
+ld1d { z23.d }, p3/z, [x13, x8, lsl #3]
+ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+ld1d { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1d { z31.d }, p7/z, [sp, z31.d]
+ld1d { z31.d }, p7/z, [z31.d, #248]
+ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+ld1h { z0.d }, p0/z, [x0]
+ld1h { z0.d }, p0/z, [z0.d]
+ld1h { z0.h }, p0/z, [x0]
+ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+ld1h { z0.s }, p0/z, [x0]
+ld1h { z0.s }, p0/z, [z0.s]
+ld1h { z21.d }, p5/z, [x10, #5, mul vl]
+ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1h { z21.h }, p5/z, [x10, #5, mul vl]
+ld1h { z21.s }, p5/z, [x10, #5, mul vl]
+ld1h { z21.s }, p5/z, [x10, x21, lsl #1]
+ld1h { z23.d }, p3/z, [x13, x8, lsl #1]
+ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1h { z31.d }, p7/z, [sp, z31.d]
+ld1h { z31.d }, p7/z, [z31.d, #62]
+ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
+ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
+ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+ld1h { z31.s }, p7/z, [z31.s, #62]
+ld1h { z5.h }, p3/z, [sp, x16, lsl #1]
+ld1h { z5.h }, p3/z, [x17, x16, lsl #1]
+ld1rb { z0.b }, p0/z, [x0]
+ld1rb { z0.d }, p0/z, [x0]
+ld1rb { z0.h }, p0/z, [x0]
+ld1rb { z0.s }, p0/z, [x0]
+ld1rb { z31.b }, p7/z, [sp, #63]
+ld1rb { z31.d }, p7/z, [sp, #63]
+ld1rb { z31.h }, p7/z, [sp, #63]
+ld1rb { z31.s }, p7/z, [sp, #63]
+ld1rd { z0.d }, p0/z, [x0]
+ld1rd { z31.d }, p7/z, [sp, #504]
+ld1rh { z0.d }, p0/z, [x0]
+ld1rh { z0.h }, p0/z, [x0]
+ld1rh { z0.s }, p0/z, [x0]
+ld1rh { z31.d }, p7/z, [sp, #126]
+ld1rh { z31.h }, p7/z, [sp, #126]
+ld1rh { z31.s }, p7/z, [sp, #126]
+ld1rqb { z0.b }, p0/z, [x0, x0]
+ld1rqb { z0.b }, p0/z, [x0]
+ld1rqb { z21.b }, p5/z, [x10, #112]
+ld1rqb { z23.b }, p3/z, [x13, #-128]
+ld1rqb { z31.b }, p7/z, [sp, #-16]
+ld1rqd { z0.d }, p0/z, [x0, x0, lsl #3]
+ld1rqd { z0.d }, p0/z, [x0]
+ld1rqd { z23.d }, p3/z, [x13, #-128]
+ld1rqd { z23.d }, p3/z, [x13, #112]
+ld1rqd { z31.d }, p7/z, [sp, #-16]
+ld1rqh { z0.h }, p0/z, [x0, x0, lsl #1]
+ld1rqh { z0.h }, p0/z, [x0]
+ld1rqh { z23.h }, p3/z, [x13, #-128]
+ld1rqh { z23.h }, p3/z, [x13, #112]
+ld1rqh { z31.h }, p7/z, [sp, #-16]
+ld1rqw { z0.s }, p0/z, [x0, x0, lsl #2]
+ld1rqw { z0.s }, p0/z, [x0]
+ld1rqw { z23.s }, p3/z, [x13, #-128]
+ld1rqw { z23.s }, p3/z, [x13, #112]
+ld1rqw { z31.s }, p7/z, [sp, #-16]
+ld1rsb { z0.d }, p0/z, [x0]
+ld1rsb { z0.h }, p0/z, [x0]
+ld1rsb { z0.s }, p0/z, [x0]
+ld1rsb { z31.d }, p7/z, [sp, #63]
+ld1rsb { z31.h }, p7/z, [sp, #63]
+ld1rsb { z31.s }, p7/z, [sp, #63]
+ld1rsh { z0.d }, p0/z, [x0]
+ld1rsh { z0.s }, p0/z, [x0]
+ld1rsh { z31.d }, p7/z, [sp, #126]
+ld1rsh { z31.s }, p7/z, [sp, #126]
+ld1rsw { z0.d }, p0/z, [x0]
+ld1rsw { z31.d }, p7/z, [sp, #252]
+ld1rw { z0.d }, p0/z, [x0]
+ld1rw { z0.s }, p0/z, [x0]
+ld1rw { z31.d }, p7/z, [sp, #252]
+ld1rw { z31.s }, p7/z, [sp, #252]
+ld1sb { z0.d }, p0/z, [x0]
+ld1sb { z0.d }, p0/z, [z0.d]
+ld1sb { z0.h }, p0/z, [sp, x0]
+ld1sb { z0.h }, p0/z, [x0, x0]
+ld1sb { z0.h }, p0/z, [x0]
+ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+ld1sb { z0.s }, p0/z, [x0]
+ld1sb { z0.s }, p0/z, [z0.s]
+ld1sb { z21.d }, p5/z, [x10, #5, mul vl]
+ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1sb { z21.h }, p5/z, [x10, #5, mul vl]
+ld1sb { z21.s }, p5/z, [x10, #5, mul vl]
+ld1sb { z21.s }, p5/z, [x10, x21]
+ld1sb { z23.d }, p3/z, [x13, x8]
+ld1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1sb { z31.d }, p7/z, [sp, z31.d]
+ld1sb { z31.d }, p7/z, [z31.d, #31]
+ld1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+ld1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+ld1sb { z31.s }, p7/z, [z31.s, #31]
+ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+ld1sh { z0.d }, p0/z, [x0]
+ld1sh { z0.d }, p0/z, [z0.d]
+ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+ld1sh { z0.s }, p0/z, [x0]
+ld1sh { z0.s }, p0/z, [z0.s]
+ld1sh { z21.d }, p5/z, [x10, #5, mul vl]
+ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1sh { z21.s }, p5/z, [sp, x21, lsl #1]
+ld1sh { z21.s }, p5/z, [x10, #5, mul vl]
+ld1sh { z21.s }, p5/z, [x10, x21, lsl #1]
+ld1sh { z23.d }, p3/z, [x13, x8, lsl #1]
+ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+ld1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1sh { z31.d }, p7/z, [sp, z31.d]
+ld1sh { z31.d }, p7/z, [z31.d, #62]
+ld1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+ld1sh { z31.s }, p7/z, [z31.s, #62]
+ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+ld1sw { z0.d }, p0/z, [x0]
+ld1sw { z0.d }, p0/z, [z0.d]
+ld1sw { z21.d }, p5/z, [x10, #5, mul vl]
+ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1sw { z23.d }, p3/z, [sp, x8, lsl #2]
+ld1sw { z23.d }, p3/z, [x13, x8, lsl #2]
+ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+ld1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1sw { z31.d }, p7/z, [sp, z31.d]
+ld1sw { z31.d }, p7/z, [z31.d, #124]
+ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+ld1w { z0.d }, p0/z, [x0]
+ld1w { z0.d }, p0/z, [z0.d]
+ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+ld1w { z0.s }, p0/z, [x0]
+ld1w { z0.s }, p0/z, [z0.s]
+ld1w { z21.d }, p5/z, [x10, #5, mul vl]
+ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1w { z21.s }, p5/z, [sp, x21, lsl #2]
+ld1w { z21.s }, p5/z, [x10, #5, mul vl]
+ld1w { z21.s }, p5/z, [x10, x21, lsl #2]
+ld1w { z23.d }, p3/z, [x13, x8, lsl #2]
+ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+ld1w { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1w { z31.d }, p7/z, [sp, z31.d]
+ld1w { z31.d }, p7/z, [z31.d, #124]
+ld1w { z31.s }, p7/z, [sp, #-1, mul vl]
+ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+ld1w { z31.s }, p7/z, [z31.s, #124]
+ld2b { z0.b, z1.b }, p0/z, [x0, x0]
+ld2b { z0.b, z1.b }, p0/z, [x0]
+ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+ld2b { z5.b, z6.b }, p3/z, [x17, x16]
+ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+ld2d { z0.d, z1.d }, p0/z, [x0]
+ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+ld2h { z0.h, z1.h }, p0/z, [x0]
+ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+ld2w { z0.s, z1.s }, p0/z, [x0]
+ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0]
+ld3b { z0.b, z1.b, z2.b }, p0/z, [x0]
+ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
+ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
+ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16]
+ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
+ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
+ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
+ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
+ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
+ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
+ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
+ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
+ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
+ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
+ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
+ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
+ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
+ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
+ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
+ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
+ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
+ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
+ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
+ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
+ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
+ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
+ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
+ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
+ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
+ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
+ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
+ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
+ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
+ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
+ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
+ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
+ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
+ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
+ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
+ldff1b { z0.d }, p0/z, [x0, x0]
+ldff1b { z0.d }, p0/z, [z0.d]
+ldff1b { z0.h }, p0/z, [x0, x0]
+ldff1b { z0.s }, p0/z, [x0, x0]
+ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+ldff1b { z0.s }, p0/z, [z0.s]
+ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1b { z31.b }, p7/z, [sp]
+ldff1b { z31.d }, p7/z, [sp, z31.d]
+ldff1b { z31.d }, p7/z, [sp]
+ldff1b { z31.d }, p7/z, [z31.d, #31]
+ldff1b { z31.h }, p7/z, [sp]
+ldff1b { z31.s }, p7/z, [sp]
+ldff1b { z31.s }, p7/z, [z31.s, #31]
+ldff1d { z0.d }, p0/z, [x0, x0, lsl #3]
+ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+ldff1d { z0.d }, p0/z, [z0.d]
+ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+ldff1d { z31.d }, p7/z, [sp, z31.d]
+ldff1d { z31.d }, p7/z, [sp]
+ldff1d { z31.d }, p7/z, [z31.d, #248]
+ldff1h { z0.d }, p0/z, [x0, x0, lsl #1]
+ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+ldff1h { z0.d }, p0/z, [z0.d]
+ldff1h { z0.h }, p0/z, [x0, x0, lsl #1]
+ldff1h { z0.s }, p0/z, [x0, x0, lsl #1]
+ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+ldff1h { z0.s }, p0/z, [z0.s]
+ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+ldff1h { z31.d }, p7/z, [sp, z31.d]
+ldff1h { z31.d }, p7/z, [sp]
+ldff1h { z31.d }, p7/z, [z31.d, #62]
+ldff1h { z31.h }, p7/z, [sp]
+ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+ldff1h { z31.s }, p7/z, [sp]
+ldff1h { z31.s }, p7/z, [z31.s, #62]
+ldff1sb { z0.d }, p0/z, [x0, x0]
+ldff1sb { z0.d }, p0/z, [z0.d]
+ldff1sb { z0.h }, p0/z, [x0, x0]
+ldff1sb { z0.s }, p0/z, [x0, x0]
+ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
+ldff1sb { z0.s }, p0/z, [z0.s]
+ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1sb { z31.d }, p7/z, [sp, z31.d]
+ldff1sb { z31.d }, p7/z, [sp]
+ldff1sb { z31.d }, p7/z, [z31.d, #31]
+ldff1sb { z31.h }, p7/z, [sp]
+ldff1sb { z31.s }, p7/z, [sp]
+ldff1sb { z31.s }, p7/z, [z31.s, #31]
+ldff1sh { z0.d }, p0/z, [x0, x0, lsl #1]
+ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+ldff1sh { z0.d }, p0/z, [z0.d]
+ldff1sh { z0.s }, p0/z, [x0, x0, lsl #1]
+ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+ldff1sh { z0.s }, p0/z, [z0.s]
+ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+ldff1sh { z31.d }, p7/z, [sp, z31.d]
+ldff1sh { z31.d }, p7/z, [sp]
+ldff1sh { z31.d }, p7/z, [z31.d, #62]
+ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+ldff1sh { z31.s }, p7/z, [sp]
+ldff1sh { z31.s }, p7/z, [z31.s, #62]
+ldff1sw { z0.d }, p0/z, [x0, x0, lsl #2]
+ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+ldff1sw { z0.d }, p0/z, [z0.d]
+ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+ldff1sw { z31.d }, p7/z, [sp, z31.d]
+ldff1sw { z31.d }, p7/z, [sp]
+ldff1sw { z31.d }, p7/z, [z31.d, #124]
+ldff1w { z0.d }, p0/z, [x0, x0, lsl #2]
+ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+ldff1w { z0.d }, p0/z, [z0.d]
+ldff1w { z0.s }, p0/z, [x0, x0, lsl #2]
+ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+ldff1w { z0.s }, p0/z, [z0.s]
+ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+ldff1w { z31.d }, p7/z, [sp, z31.d]
+ldff1w { z31.d }, p7/z, [sp]
+ldff1w { z31.d }, p7/z, [z31.d, #124]
+ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+ldff1w { z31.s }, p7/z, [sp]
+ldff1w { z31.s }, p7/z, [z31.s, #124]
+ldnf1b { z0.b }, p0/z, [x0]
+ldnf1b { z0.d }, p0/z, [x0]
+ldnf1b { z0.h }, p0/z, [x0]
+ldnf1b { z0.s }, p0/z, [x0]
+ldnf1b { z21.b }, p5/z, [x10, #5, mul vl]
+ldnf1b { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1b { z21.h }, p5/z, [x10, #5, mul vl]
+ldnf1b { z21.s }, p5/z, [x10, #5, mul vl]
+ldnf1b { z31.b }, p7/z, [sp, #-1, mul vl]
+ldnf1b { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1b { z31.h }, p7/z, [sp, #-1, mul vl]
+ldnf1b { z31.s }, p7/z, [sp, #-1, mul vl]
+ldnf1d { z0.d }, p0/z, [x0]
+ldnf1d { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1d { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1h { z0.d }, p0/z, [x0]
+ldnf1h { z0.h }, p0/z, [x0]
+ldnf1h { z0.s }, p0/z, [x0]
+ldnf1h { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1h { z21.h }, p5/z, [x10, #5, mul vl]
+ldnf1h { z21.s }, p5/z, [x10, #5, mul vl]
+ldnf1h { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1h { z31.h }, p7/z, [sp, #-1, mul vl]
+ldnf1h { z31.s }, p7/z, [sp, #-1, mul vl]
+ldnf1sb { z0.d }, p0/z, [x0]
+ldnf1sb { z0.h }, p0/z, [x0]
+ldnf1sb { z0.s }, p0/z, [x0]
+ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1sb { z21.h }, p5/z, [x10, #5, mul vl]
+ldnf1sb { z21.s }, p5/z, [x10, #5, mul vl]
+ldnf1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+ldnf1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+ldnf1sh { z0.d }, p0/z, [x0]
+ldnf1sh { z0.s }, p0/z, [x0]
+ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1sh { z21.s }, p5/z, [x10, #5, mul vl]
+ldnf1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+ldnf1sw { z0.d }, p0/z, [x0]
+ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1w { z0.d }, p0/z, [x0]
+ldnf1w { z0.s }, p0/z, [x0]
+ldnf1w { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1w { z21.s }, p5/z, [x10, #5, mul vl]
+ldnf1w { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1w { z31.s }, p7/z, [sp, #-1, mul vl]
+ldnt1b { z0.b }, p0/z, [x0, x0]
+ldnt1b { z0.b }, p0/z, [x0]
+ldnt1b { z0.d }, p0/z, [z1.d]
+ldnt1b { z0.s }, p0/z, [z1.s]
+ldnt1b { z21.b }, p5/z, [x10, #7, mul vl]
+ldnt1b { z23.b }, p3/z, [x13, #-8, mul vl]
+ldnt1b { z31.d }, p7/z, [z31.d, x0]
+ldnt1b { z31.d }, p7/z, [z31.d]
+ldnt1b { z31.s }, p7/z, [z31.s, x0]
+ldnt1b { z31.s }, p7/z, [z31.s]
+ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3]
+ldnt1d { z0.d }, p0/z, [x0]
+ldnt1d { z0.d }, p0/z, [z1.d]
+ldnt1d { z21.d }, p5/z, [x10, #7, mul vl]
+ldnt1d { z23.d }, p3/z, [x13, #-8, mul vl]
+ldnt1d { z31.d }, p7/z, [z31.d, x0]
+ldnt1d { z31.d }, p7/z, [z31.d]
+ldnt1h { z0.d }, p0/z, [z1.d]
+ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1]
+ldnt1h { z0.h }, p0/z, [x0]
+ldnt1h { z0.s }, p0/z, [z1.s]
+ldnt1h { z21.h }, p5/z, [x10, #7, mul vl]
+ldnt1h { z23.h }, p3/z, [x13, #-8, mul vl]
+ldnt1h { z31.d }, p7/z, [z31.d, x0]
+ldnt1h { z31.d }, p7/z, [z31.d]
+ldnt1h { z31.s }, p7/z, [z31.s, x0]
+ldnt1h { z31.s }, p7/z, [z31.s]
+ldnt1sb { z0.d }, p0/z, [z1.d]
+ldnt1sb { z0.s }, p0/z, [z1.s]
+ldnt1sb { z31.d }, p7/z, [z31.d, x0]
+ldnt1sb { z31.d }, p7/z, [z31.d]
+ldnt1sb { z31.s }, p7/z, [z31.s, x0]
+ldnt1sb { z31.s }, p7/z, [z31.s]
+ldnt1sh { z0.d }, p0/z, [z1.d]
+ldnt1sh { z0.s }, p0/z, [z1.s]
+ldnt1sh { z31.d }, p7/z, [z31.d, x0]
+ldnt1sh { z31.d }, p7/z, [z31.d]
+ldnt1sh { z31.s }, p7/z, [z31.s, x0]
+ldnt1sh { z31.s }, p7/z, [z31.s]
+ldnt1sw { z0.d }, p0/z, [z1.d]
+ldnt1sw { z31.d }, p7/z, [z31.d, x0]
+ldnt1sw { z31.d }, p7/z, [z31.d]
+ldnt1w { z0.d }, p0/z, [z1.d]
+ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2]
+ldnt1w { z0.s }, p0/z, [x0]
+ldnt1w { z0.s }, p0/z, [z1.s]
+ldnt1w { z21.s }, p5/z, [x10, #7, mul vl]
+ldnt1w { z23.s }, p3/z, [x13, #-8, mul vl]
+ldnt1w { z31.d }, p7/z, [z31.d, x0]
+ldnt1w { z31.d }, p7/z, [z31.d]
+ldnt1w { z31.s }, p7/z, [z31.s, x0]
+ldnt1w { z31.s }, p7/z, [z31.s]
+ldr p0, [x0]
+ldr p5, [x10, #255, mul vl]
+ldr p7, [x13, #-256, mul vl]
+ldr z0, [x0]
+ldr z23, [x13, #255, mul vl]
+ldr z31, [sp, #-256, mul vl]
+lsl z0.b, p0/m, z0.b, #0
+lsl z0.b, p0/m, z0.b, z0.b
+lsl z0.b, p0/m, z0.b, z1.d
+lsl z0.b, z0.b, #0
+lsl z0.b, z1.b, z2.d
+lsl z0.d, p0/m, z0.d, #0
+lsl z0.d, p0/m, z0.d, z0.d
+lsl z0.d, z0.d, #0
+lsl z0.h, p0/m, z0.h, #0
+lsl z0.h, p0/m, z0.h, z0.h
+lsl z0.h, p0/m, z0.h, z1.d
+lsl z0.h, z0.h, #0
+lsl z0.h, z1.h, z2.d
+lsl z0.s, p0/m, z0.s, #0
+lsl z0.s, p0/m, z0.s, z0.s
+lsl z0.s, p0/m, z0.s, z1.d
+lsl z0.s, z0.s, #0
+lsl z0.s, z1.s, z2.d
+lsl z31.b, p0/m, z31.b, #7
+lsl z31.b, z31.b, #7
+lsl z31.d, p0/m, z31.d, #63
+lsl z31.d, z31.d, #63
+lsl z31.h, p0/m, z31.h, #15
+lsl z31.h, z31.h, #15
+lsl z31.s, p0/m, z31.s, #31
+lsl z31.s, z31.s, #31
+lslr z0.b, p0/m, z0.b, z0.b
+lslr z0.d, p0/m, z0.d, z0.d
+lslr z0.h, p0/m, z0.h, z0.h
+lslr z0.s, p0/m, z0.s, z0.s
+lsr z0.b, p0/m, z0.b, #1
+lsr z0.b, p0/m, z0.b, z0.b
+lsr z0.b, p0/m, z0.b, z1.d
+lsr z0.b, z0.b, #1
+lsr z0.b, z1.b, z2.d
+lsr z0.d, p0/m, z0.d, #1
+lsr z0.d, p0/m, z0.d, z0.d
+lsr z0.d, z0.d, #1
+lsr z0.h, p0/m, z0.h, #1
+lsr z0.h, p0/m, z0.h, z0.h
+lsr z0.h, p0/m, z0.h, z1.d
+lsr z0.h, z0.h, #1
+lsr z0.h, z1.h, z2.d
+lsr z0.s, p0/m, z0.s, #1
+lsr z0.s, p0/m, z0.s, z0.s
+lsr z0.s, p0/m, z0.s, z1.d
+lsr z0.s, z0.s, #1
+lsr z0.s, z1.s, z2.d
+lsr z31.b, p0/m, z31.b, #8
+lsr z31.b, z31.b, #8
+lsr z31.d, p0/m, z31.d, #64
+lsr z31.d, z31.d, #64
+lsr z31.h, p0/m, z31.h, #16
+lsr z31.h, z31.h, #16
+lsr z31.s, p0/m, z31.s, #32
+lsr z31.s, z31.s, #32
+lsrr z0.b, p0/m, z0.b, z0.b
+lsrr z0.d, p0/m, z0.d, z0.d
+lsrr z0.h, p0/m, z0.h, z0.h
+lsrr z0.s, p0/m, z0.s, z0.s
+mad z0.b, p7/m, z1.b, z31.b
+mad z0.d, p7/m, z1.d, z31.d
+mad z0.h, p7/m, z1.h, z31.h
+mad z0.s, p7/m, z1.s, z31.s
+match p0.b, p0/z, z0.b, z0.b
+match p0.h, p0/z, z0.h, z0.h
+match p15.b, p7/z, z30.b, z31.b
+match p15.h, p7/z, z30.h, z31.h
+mla z0.b, p7/m, z1.b, z31.b
+mla z0.d, p7/m, z1.d, z31.d
+mla z0.d, z1.d, z7.d[1]
+mla z0.h, p7/m, z1.h, z31.h
+mla z0.h, z1.h, z7.h[7]
+mla z0.s, p7/m, z1.s, z31.s
+mla z0.s, z1.s, z7.s[3]
+mls z0.b, p7/m, z1.b, z31.b
+mls z0.d, p7/m, z1.d, z31.d
+mls z0.d, z1.d, z7.d[1]
+mls z0.h, p7/m, z1.h, z31.h
+mls z0.h, z1.h, z7.h[7]
+mls z0.s, p7/m, z1.s, z31.s
+mls z0.s, z1.s, z7.s[3]
+mov p0.b, p0.b
+mov p0.b, p0/m, p0.b
+mov p0.b, p0/z, p0.b
+mov p15.b, p15.b
+mov p15.b, p15/m, p15.b
+mov p15.b, p15/z, p15.b
+mov z0.b, #127
+mov z0.b, b0
+mov z0.b, p0/m, b0
+mov z0.b, p0/m, w0
+mov z0.b, p0/z, #127
+mov z0.b, w0
+mov z0.d, #0
+mov z0.d, #0xe0000000000003ff
+mov z0.d, #0xffffffffffff7fff
+mov z0.d, #32768
+mov z0.d, d0
+mov z0.d, p0/m, d0
+mov z0.d, p0/m, x0
+mov z0.d, x0
+mov z0.d, z0.d
+mov z0.h, #-256
+mov z0.h, #-32768
+mov z0.h, #0
+mov z0.h, #32512
+mov z0.h, #32767
+mov z0.h, h0
+mov z0.h, p0/m, h0
+mov z0.h, p0/m, w0
+mov z0.h, p0/z, #32512
+mov z0.h, w0
+mov z0.q, q0
+mov z0.s, #0
+mov z0.s, #0xffff7fff
+mov z0.s, #32768
+mov z0.s, p0/m, s0
+mov z0.s, p0/m, w0
+mov z0.s, s0
+mov z0.s, w0
+mov z21.d, #-128
+mov z21.d, #-32768
+mov z21.d, #127
+mov z21.d, #32512
+mov z21.d, p0/z, #-128
+mov z21.d, p0/z, #-32768
+mov z21.d, p0/z, #127
+mov z21.d, p0/z, #32512
+mov z21.d, p15/m, #-128
+mov z21.d, p15/m, #-32768
+mov z21.h, #-128
+mov z21.h, #-32768
+mov z21.h, #127
+mov z21.h, #32512
+mov z21.h, p0/z, #-128
+mov z21.h, p0/z, #-32768
+mov z21.h, p0/z, #127
+mov z21.h, p0/z, #32512
+mov z21.h, p15/m, #-128
+mov z21.h, p15/m, #-32768
+mov z21.s, #-128
+mov z21.s, #-32768
+mov z21.s, #127
+mov z21.s, #32512
+mov z21.s, p0/z, #-128
+mov z21.s, p0/z, #-32768
+mov z21.s, p0/z, #127
+mov z21.s, p0/z, #32512
+mov z21.s, p15/m, #-128
+mov z21.s, p15/m, #-32768
+mov z31.b, p15/m, z31.b
+mov z31.b, p7/m, b31
+movprfx z31, z6
+mov z31.b, p7/m, wsp
+mov z31.b, wsp
+mov z31.b, z31.b[63]
+mov z31.d, p15/m, z31.d
+mov z31.d, p7/m, d31
+movprfx z31.d, p7/z, z6.d
+mov z31.d, p7/m, sp
+mov z31.d, sp
+mov z31.d, z0.d
+mov z31.d, z31.d[7]
+mov z31.h, p15/m, z31.h
+mov z31.h, p7/m, h31
+mov z31.h, p7/m, wsp
+mov z31.h, wsp
+mov z31.h, z31.h[31]
+mov z31.s, p15/m, z31.s
+mov z31.s, p7/m, s31
+mov z31.s, p7/m, wsp
+mov z31.s, wsp
+mov z31.s, z31.s[15]
+mov z5.b, #-1
+mov z5.b, #-128
+mov z5.b, #127
+mov z5.b, p0/z, #-1
+mov z5.b, p0/z, #-128
+mov z5.b, p0/z, #127
+mov z5.b, p15/m, #-128
+mov z5.d, #-6
+mov z5.h, #-6
+mov z5.q, z17.q[3]
+mov z5.s, #-6
+movs p0.b, p0.b
+movs p0.b, p0/z, p0.b
+movs p15.b, p15.b
+movs p15.b, p15/z, p15.b
+mrs x3, ID_AA64ZFR0_EL1
+mrs x3, ZCR_EL1
+mrs x3, ZCR_EL12
+mrs x3, ZCR_EL2
+mrs x3, ZCR_EL3
+msb z0.b, p7/m, z1.b, z31.b
+msb z0.d, p7/m, z1.d, z31.d
+msb z0.h, p7/m, z1.h, z31.h
+msb z0.s, p7/m, z1.s, z31.s
+msr ZCR_EL1, x3
+msr ZCR_EL12, x3
+msr ZCR_EL2, x3
+msr ZCR_EL3, x3
+mul z0.b, p7/m, z0.b, z31.b
+mul z0.b, z1.b, z2.b
+mul z0.d, p7/m, z0.d, z31.d
+mul z0.d, z1.d, z15.d[1]
+mul z0.h, p7/m, z0.h, z31.h
+mul z0.h, z1.h, z2.h
+mul z0.h, z1.h, z7.h[7]
+mul z0.s, p7/m, z0.s, z31.s
+mul z0.s, z1.s, z7.s[3]
+mul z29.s, z30.s, z31.s
+mul z31.b, z31.b, #-128
+mul z31.b, z31.b, #127
+mul z31.d, z31.d, #-128
+mul z31.d, z31.d, #127
+mul z31.d, z31.d, z31.d
+mul z31.h, z31.h, #-128
+mul z31.h, z31.h, #127
+mul z31.s, z31.s, #-128
+mul z31.s, z31.s, #127
+nand p0.b, p0/z, p0.b, p0.b
+nand p15.b, p15/z, p15.b, p15.b
+nands p0.b, p0/z, p0.b, p0.b
+nands p15.b, p15/z, p15.b, p15.b
+nbsl z0.d, z0.d, z1.d, z2.d
+neg z0.b, p0/m, z0.b
+neg z0.d, p0/m, z0.d
+neg z0.h, p0/m, z0.h
+neg z0.s, p0/m, z0.s
+neg z31.b, p7/m, z31.b
+neg z31.d, p7/m, z31.d
+neg z31.h, p7/m, z31.h
+neg z31.s, p7/m, z31.s
+nmatch p0.b, p0/z, z0.b, z0.b
+nmatch p0.h, p0/z, z0.h, z0.h
+nmatch p15.b, p7/z, z30.b, z31.b
+nmatch p15.h, p7/z, z30.h, z31.h
+nor p0.b, p0/z, p0.b, p0.b
+nor p15.b, p15/z, p15.b, p15.b
+nors p0.b, p0/z, p0.b, p0.b
+nors p15.b, p15/z, p15.b, p15.b
+not p0.b, p0/z, p0.b
+not p15.b, p15/z, p15.b
+not z31.b, p7/m, z31.b
+not z31.d, p7/m, z31.d
+not z31.h, p7/m, z31.h
+not z31.s, p7/m, z31.s
+nots p0.b, p0/z, p0.b
+nots p15.b, p15/z, p15.b
+orn p0.b, p0/z, p0.b, p0.b
+orn p15.b, p15/z, p15.b, p15.b
+orns p0.b, p0/z, p0.b, p0.b
+orns p15.b, p15/z, p15.b, p15.b
+orr p0.b, p0/z, p0.b, p1.b
+orr z0.d, z0.d, #0x6
+orr z0.d, z0.d, #0xfffffffffffffff9
+orr z0.s, z0.s, #0x6
+orr z0.s, z0.s, #0xfffffff9
+orr z23.d, z13.d, z8.d
+orr z23.h, z23.h, #0x6
+orr z23.h, z23.h, #0xfff9
+orr z31.b, p7/m, z31.b, z31.b
+orr z31.d, p7/m, z31.d, z31.d
+orr z31.h, p7/m, z31.h, z31.h
+orr z31.s, p7/m, z31.s, z31.s
+orr z5.b, z5.b, #0x6
+orr z5.b, z5.b, #0xf9
+orrs p0.b, p0/z, p0.b, p1.b
+orv b0, p7, z31.b
+orv d0, p7, z31.d
+orv h0, p7, z31.h
+orv s0, p7, z31.s
+pfalse p15.b
+pfirst p0.b, p15, p0.b
+pfirst p15.b, p15, p15.b
+pmul z0.b, z1.b, z2.b
+pmul z29.b, z30.b, z31.b
+pmullb z0.h, z1.b, z2.b
+pmullb z29.q, z30.d, z31.d
+pmullb z31.d, z31.s, z31.s
+pmullt z0.h, z1.b, z2.b
+pmullt z29.q, z30.d, z31.d
+pmullt z31.d, z31.s, z31.s
+pnext p0.b, p15, p0.b
+pnext p0.d, p15, p0.d
+pnext p0.h, p15, p0.h
+pnext p0.s, p15, p0.s
+pnext p15.b, p15, p15.b
+prfb #14, p0, [x0]
+prfb #15, p0, [x0]
+prfb #6, p0, [x0]
+prfb #7, p0, [x0]
+prfb #7, p3, [z13.s, #31]
+prfb #7, p3, [z13.s]
+prfb pldl1keep, p0, [x0, z0.d, uxtw]
+prfb pldl1keep, p0, [x0, z0.d]
+prfb pldl1keep, p0, [x0, z0.s, uxtw]
+prfb pldl1keep, p0, [x0]
+prfb pldl1strm, p0, [x0, #-32, mul vl]
+prfb pldl1strm, p0, [x0, #31, mul vl]
+prfb pldl1strm, p0, [x0]
+prfb pldl2keep, p0, [x0]
+prfb pldl2strm, p0, [x0]
+prfb pldl3keep, p0, [x0]
+prfb pldl3strm, p0, [x0]
+prfb pldl3strm, p5, [x10, z21.d, sxtw]
+prfb pldl3strm, p5, [x10, z21.s, uxtw]
+prfb pldl3strm, p5, [z10.d, #31]
+prfb pldl3strm, p5, [z10.d]
+prfb pstl1keep, p0, [x0]
+prfb pstl1strm, p0, [x0]
+prfb pstl2keep, p0, [x0]
+prfb pstl2strm, p0, [x0]
+prfb pstl3keep, p0, [x0]
+prfb pstl3strm, p0, [x0]
+prfd #14, p0, [x0]
+prfd #15, p0, [x0]
+prfd #15, p7, [z31.d, #248]
+prfd #15, p7, [z31.d]
+prfd #15, p7, [z31.s, #248]
+prfd #15, p7, [z31.s]
+prfd #6, p0, [x0]
+prfd #7, p0, [x0]
+prfd pldl1keep, p0, [x0, z0.d, lsl #3]
+prfd pldl1keep, p0, [x0, z0.d, sxtw #3]
+prfd pldl1keep, p0, [x0, z0.d, uxtw #3]
+prfd pldl1keep, p0, [x0, z0.s, sxtw #3]
+prfd pldl1keep, p0, [x0, z0.s, uxtw #3]
+prfd pldl1keep, p0, [x0]
+prfd pldl1strm, p0, [x0, #-32, mul vl]
+prfd pldl1strm, p0, [x0, #31, mul vl]
+prfd pldl1strm, p0, [x0]
+prfd pldl2keep, p0, [x0]
+prfd pldl2strm, p0, [x0]
+prfd pldl3keep, p0, [x0]
+prfd pldl3strm, p0, [x0]
+prfd pstl1keep, p0, [x0]
+prfd pstl1strm, p0, [x0]
+prfd pstl2keep, p0, [x0]
+prfd pstl2strm, p0, [x0]
+prfd pstl3keep, p0, [x0]
+prfd pstl3strm, p0, [x0]
+prfh #14, p0, [x0]
+prfh #15, p0, [x0]
+prfh #15, p7, [z31.d, #62]
+prfh #15, p7, [z31.d]
+prfh #15, p7, [z31.s, #62]
+prfh #15, p7, [z31.s]
+prfh #6, p0, [x0]
+prfh #7, p0, [x0]
+prfh pldl1keep, p0, [x0, z0.d, lsl #1]
+prfh pldl1keep, p0, [x0]
+prfh pldl1strm, p0, [x0, #-32, mul vl]
+prfh pldl1strm, p0, [x0, #31, mul vl]
+prfh pldl1strm, p0, [x0]
+prfh pldl2keep, p0, [x0]
+prfh pldl2strm, p0, [x0]
+prfh pldl3keep, p0, [x0]
+prfh pldl3strm, p0, [x0]
+prfh pldl3strm, p5, [x10, z21.d, sxtw #1]
+prfh pldl3strm, p5, [x10, z21.d, uxtw #1]
+prfh pldl3strm, p5, [x10, z21.s, sxtw #1]
+prfh pldl3strm, p5, [x10, z21.s, uxtw #1]
+prfh pstl1keep, p0, [x0]
+prfh pstl1strm, p0, [x0]
+prfh pstl2keep, p0, [x0]
+prfh pstl2strm, p0, [x0]
+prfh pstl3keep, p0, [x0]
+prfh pstl3strm, p0, [x0]
+prfw #14, p0, [x0]
+prfw #15, p0, [x0]
+prfw #15, p7, [z31.d, #124]
+prfw #15, p7, [z31.d]
+prfw #15, p7, [z31.s, #124]
+prfw #15, p7, [z31.s]
+prfw #6, p0, [x0]
+prfw #7, p0, [x0]
+prfw #7, p3, [x13, z8.d, uxtw #2]
+prfw pldl1keep, p0, [x0, z0.d, sxtw #2]
+prfw pldl1keep, p0, [x0, z0.s, uxtw #2]
+prfw pldl1keep, p0, [x0]
+prfw pldl1strm, p0, [x0, #-32, mul vl]
+prfw pldl1strm, p0, [x0, #31, mul vl]
+prfw pldl1strm, p0, [x0]
+prfw pldl2keep, p0, [x0]
+prfw pldl2strm, p0, [x0]
+prfw pldl3keep, p0, [x0]
+prfw pldl3strm, p0, [x0]
+prfw pldl3strm, p5, [x10, z21.d, lsl #2]
+prfw pldl3strm, p5, [x10, z21.s, sxtw #2]
+prfw pstl1keep, p0, [x0]
+prfw pstl1strm, p0, [x0]
+prfw pstl2keep, p0, [x0]
+prfw pstl2strm, p0, [x0]
+prfw pstl3keep, p0, [x0]
+prfw pstl3strm, p0, [x0]
+ptest p15, p0.b
+ptest p15, p15.b
+ptrue p0.b, pow2
+ptrue p0.d, pow2
+ptrue p0.h, pow2
+ptrue p0.s, pow2
+ptrue p15.b
+ptrue p15.d
+ptrue p15.h
+ptrue p15.s
+ptrue p7.s
+ptrue p7.s, #14
+ptrue p7.s, #15
+ptrue p7.s, #16
+ptrue p7.s, #17
+ptrue p7.s, #18
+ptrue p7.s, #19
+ptrue p7.s, #20
+ptrue p7.s, #21
+ptrue p7.s, #22
+ptrue p7.s, #23
+ptrue p7.s, #24
+ptrue p7.s, #25
+ptrue p7.s, #26
+ptrue p7.s, #27
+ptrue p7.s, #28
+ptrue p7.s, mul3
+ptrue p7.s, mul4
+ptrue p7.s, vl1
+ptrue p7.s, vl128
+ptrue p7.s, vl16
+ptrue p7.s, vl2
+ptrue p7.s, vl256
+ptrue p7.s, vl3
+ptrue p7.s, vl32
+ptrue p7.s, vl4
+ptrue p7.s, vl5
+ptrue p7.s, vl6
+ptrue p7.s, vl64
+ptrue p7.s, vl7
+ptrue p7.s, vl8
+ptrues p0.b, pow2
+ptrues p0.d, pow2
+ptrues p0.h, pow2
+ptrues p0.s, pow2
+ptrues p15.b
+ptrues p15.d
+ptrues p15.h
+ptrues p15.s
+ptrues p7.s
+ptrues p7.s, #14
+ptrues p7.s, #15
+ptrues p7.s, #16
+ptrues p7.s, #17
+ptrues p7.s, #18
+ptrues p7.s, #19
+ptrues p7.s, #20
+ptrues p7.s, #21
+ptrues p7.s, #22
+ptrues p7.s, #23
+ptrues p7.s, #24
+ptrues p7.s, #25
+ptrues p7.s, #26
+ptrues p7.s, #27
+ptrues p7.s, #28
+ptrues p7.s, mul3
+ptrues p7.s, mul4
+ptrues p7.s, vl1
+ptrues p7.s, vl128
+ptrues p7.s, vl16
+ptrues p7.s, vl2
+ptrues p7.s, vl256
+ptrues p7.s, vl3
+ptrues p7.s, vl32
+ptrues p7.s, vl4
+ptrues p7.s, vl5
+ptrues p7.s, vl6
+ptrues p7.s, vl64
+ptrues p7.s, vl7
+ptrues p7.s, vl8
+punpkhi p0.h, p0.b
+punpkhi p15.h, p15.b
+punpklo p0.h, p0.b
+punpklo p15.h, p15.b
+raddhnb z0.b, z1.h, z31.h
+raddhnb z0.h, z1.s, z31.s
+raddhnb z0.s, z1.d, z31.d
+raddhnt z0.b, z1.h, z31.h
+raddhnt z0.h, z1.s, z31.s
+raddhnt z0.s, z1.d, z31.d
+rax1 z0.d, z1.d, z31.d
+rbit z0.b, p7/m, z31.b
+rbit z0.d, p7/m, z31.d
+rbit z0.h, p7/m, z31.h
+rbit z0.s, p7/m, z31.s
+rdffr p0.b
+rdffr p0.b, p0/z
+rdffr p15.b
+rdffr p15.b, p15/z
+rdffrs p0.b, p0/z
+rdffrs p15.b, p15/z
+rdvl x0, #0
+rdvl x21, #-32
+rdvl x23, #31
+rdvl xzr, #-1
+rev p0.b, p1.b
+rev p0.d, p1.d
+rev p0.h, p1.h
+rev p0.s, p1.s
+rev z0.b, z31.b
+rev z0.d, z31.d
+rev z0.h, z31.h
+rev z0.s, z31.s
+revb z0.d, p7/m, z31.d
+revb z0.h, p7/m, z31.h
+revb z0.s, p7/m, z31.s
+revh z0.d, p7/m, z31.d
+revh z0.s, p7/m, z31.s
+revw z0.d, p7/m, z31.d
+rshrnb z0.b, z0.h, #1
+rshrnb z0.h, z0.s, #1
+rshrnb z0.s, z0.d, #1
+rshrnb z31.b, z31.h, #8
+rshrnb z31.h, z31.s, #16
+rshrnb z31.s, z31.d, #32
+rshrnt z0.b, z0.h, #1
+rshrnt z0.h, z0.s, #1
+rshrnt z0.s, z0.d, #1
+rshrnt z31.b, z31.h, #8
+rshrnt z31.h, z31.s, #16
+rshrnt z31.s, z31.d, #32
+rsubhnb z0.b, z1.h, z31.h
+rsubhnb z0.h, z1.s, z31.s
+rsubhnb z0.s, z1.d, z31.d
+rsubhnt z0.b, z1.h, z31.h
+rsubhnt z0.h, z1.s, z31.s
+rsubhnt z0.s, z1.d, z31.d
+saba z0.b, z1.b, z31.b
+saba z0.d, z1.d, z31.d
+saba z0.h, z1.h, z31.h
+saba z0.s, z1.s, z31.s
+sabalb z0.d, z1.s, z31.s
+sabalb z0.h, z1.b, z31.b
+sabalb z0.s, z1.h, z31.h
+sabalt z0.d, z1.s, z31.s
+sabalt z0.h, z1.b, z31.b
+sabalt z0.s, z1.h, z31.h
+sabd z31.b, p7/m, z31.b, z31.b
+sabd z31.d, p7/m, z31.d, z31.d
+sabd z31.h, p7/m, z31.h, z31.h
+sabd z31.s, p7/m, z31.s, z31.s
+sabdlb z0.h, z1.b, z2.b
+sabdlb z29.s, z30.h, z31.h
+sabdlb z31.d, z31.s, z31.s
+sabdlt z0.h, z1.b, z2.b
+sabdlt z29.s, z30.h, z31.h
+sabdlt z31.d, z31.s, z31.s
+sadalp z0.h, p0/m, z1.b
+sadalp z29.s, p0/m, z30.h
+sadalp z30.d, p7/m, z31.s
+saddlb z0.h, z1.b, z2.b
+saddlb z29.s, z30.h, z31.h
+saddlb z31.d, z31.s, z31.s
+saddlbt z0.d, z1.s, z31.s
+saddlbt z0.h, z1.b, z31.b
+saddlbt z0.s, z1.h, z31.h
+saddlt z0.h, z1.b, z2.b
+saddlt z29.s, z30.h, z31.h
+saddlt z31.d, z31.s, z31.s
+saddv d0, p7, z31.b
+saddv d0, p7, z31.h
+saddv d0, p7, z31.s
+saddwb z0.h, z1.h, z2.b
+saddwb z29.s, z30.s, z31.h
+saddwb z31.d, z31.d, z31.s
+saddwt z0.h, z1.h, z2.b
+saddwt z29.s, z30.s, z31.h
+saddwt z31.d, z31.d, z31.s
+sbclb z0.d, z1.d, z31.d
+sbclb z0.s, z1.s, z31.s
+sbclt z0.d, z1.d, z31.d
+sbclt z0.s, z1.s, z31.s
+scvtf z0.d, p0/m, z0.d
+scvtf z0.d, p0/m, z0.s
+scvtf z0.h, p0/m, z0.d
+scvtf z0.h, p0/m, z0.h
+scvtf z0.h, p0/m, z0.s
+scvtf z0.s, p0/m, z0.d
+scvtf z0.s, p0/m, z0.s
+sdiv z0.d, p7/m, z0.d, z31.d
+sdiv z0.s, p7/m, z0.s, z31.s
+sdivr z0.d, p7/m, z0.d, z31.d
+sdivr z0.s, p7/m, z0.s, z31.s
+sdot z0.d, z1.h, z15.h[1]
+sdot z0.d, z1.h, z31.h
+sdot z0.s, z1.b, z31.b
+sdot z0.s, z1.b, z7.b[3]
+sel p0.b, p1, p2.b, p3.b
+sel z23.b, p11, z13.b, z8.b
+sel z23.d, p11, z13.d, z8.d
+sel z23.h, p11, z13.h, z8.h
+sel z23.s, p11, z13.s, z8.s
+setffr
+shadd z0.b, p0/m, z0.b, z1.b
+shadd z0.h, p0/m, z0.h, z1.h
+shadd z29.s, p7/m, z29.s, z30.s
+shadd z31.d, p7/m, z31.d, z30.d
+shrnb z0.b, z0.h, #1
+shrnb z0.h, z0.s, #1
+shrnb z0.s, z0.d, #1
+shrnb z31.b, z31.h, #8
+shrnb z31.h, z31.s, #16
+shrnb z31.s, z31.d, #32
+shrnt z0.b, z0.h, #1
+shrnt z0.h, z0.s, #1
+shrnt z0.s, z0.d, #1
+shrnt z31.b, z31.h, #8
+shrnt z31.h, z31.s, #16
+shrnt z31.s, z31.d, #32
+shsub z0.b, p0/m, z0.b, z1.b
+shsub z0.h, p0/m, z0.h, z1.h
+shsub z29.s, p7/m, z29.s, z30.s
+shsub z31.d, p7/m, z31.d, z30.d
+shsubr z0.b, p0/m, z0.b, z1.b
+shsubr z0.h, p0/m, z0.h, z1.h
+shsubr z29.s, p7/m, z29.s, z30.s
+shsubr z31.d, p7/m, z31.d, z30.d
+sli z0.b, z0.b, #0
+sli z0.d, z0.d, #0
+sli z0.h, z0.h, #0
+sli z0.s, z0.s, #0
+sli z31.b, z31.b, #7
+sli z31.d, z31.d, #63
+sli z31.h, z31.h, #15
+sli z31.s, z31.s, #31
+sm4e z0.s, z0.s, z31.s
+sm4ekey z0.s, z1.s, z31.s
+smax z0.b, z0.b, #-128
+smax z0.d, z0.d, #-128
+smax z0.h, z0.h, #-128
+smax z0.s, z0.s, #-128
+smax z31.b, p7/m, z31.b, z31.b
+smax z31.b, z31.b, #127
+smax z31.d, p7/m, z31.d, z31.d
+smax z31.d, z31.d, #127
+smax z31.h, p7/m, z31.h, z31.h
+smax z31.h, z31.h, #127
+smax z31.s, p7/m, z31.s, z31.s
+smax z31.s, z31.s, #127
+smaxp z0.b, p0/m, z0.b, z1.b
+smaxp z0.h, p0/m, z0.h, z1.h
+smaxp z29.s, p7/m, z29.s, z30.s
+smaxp z31.d, p7/m, z31.d, z30.d
+smaxv b0, p7, z31.b
+smaxv d0, p7, z31.d
+smaxv h0, p7, z31.h
+smaxv s0, p7, z31.s
+smin z0.b, z0.b, #-128
+smin z0.d, z0.d, #-128
+smin z0.h, z0.h, #-128
+smin z0.s, z0.s, #-128
+smin z31.b, p7/m, z31.b, z31.b
+smin z31.b, z31.b, #127
+smin z31.d, p7/m, z31.d, z31.d
+smin z31.d, z31.d, #127
+smin z31.h, p7/m, z31.h, z31.h
+smin z31.h, z31.h, #127
+smin z31.s, p7/m, z31.s, z31.s
+smin z31.s, z31.s, #127
+sminp z0.b, p0/m, z0.b, z1.b
+sminp z0.h, p0/m, z0.h, z1.h
+sminp z29.s, p7/m, z29.s, z30.s
+sminp z31.d, p7/m, z31.d, z30.d
+sminv b0, p7, z31.b
+sminv d0, p7, z31.d
+sminv h0, p7, z31.h
+sminv s0, p7, z31.s
+smlalb z0.d, z1.s, z15.s[1]
+smlalb z0.d, z1.s, z31.s
+smlalb z0.h, z1.b, z31.b
+smlalb z0.s, z1.h, z31.h
+smlalb z0.s, z1.h, z7.h[7]
+smlalt z0.d, z1.s, z15.s[1]
+smlalt z0.d, z1.s, z31.s
+smlalt z0.h, z1.b, z31.b
+smlalt z0.s, z1.h, z31.h
+smlalt z0.s, z1.h, z7.h[7]
+smlslb z0.d, z1.s, z15.s[1]
+smlslb z0.d, z1.s, z31.s
+smlslb z0.h, z1.b, z31.b
+smlslb z0.s, z1.h, z31.h
+smlslb z0.s, z1.h, z7.h[7]
+smlslt z0.d, z1.s, z15.s[1]
+smlslt z0.d, z1.s, z31.s
+smlslt z0.h, z1.b, z31.b
+smlslt z0.s, z1.h, z31.h
+smlslt z0.s, z1.h, z7.h[7]
+smmla z0.s, z1.b, z2.b
+smulh z0.b, p7/m, z0.b, z31.b
+smulh z0.b, z1.b, z2.b
+smulh z0.d, p7/m, z0.d, z31.d
+smulh z0.h, p7/m, z0.h, z31.h
+smulh z0.h, z1.h, z2.h
+smulh z0.s, p7/m, z0.s, z31.s
+smulh z29.s, z30.s, z31.s
+smulh z31.d, z31.d, z31.d
+smullb z0.d, z1.s, z15.s[1]
+smullb z0.h, z1.b, z2.b
+smullb z0.s, z1.h, z7.h[7]
+smullb z29.s, z30.h, z31.h
+smullb z31.d, z31.s, z31.s
+smullt z0.d, z1.s, z15.s[1]
+smullt z0.h, z1.b, z2.b
+smullt z0.s, z1.h, z7.h[7]
+smullt z29.s, z30.h, z31.h
+smullt z31.d, z31.s, z31.s
+splice z29.b, p7, { z30.b, z31.b }
+splice z29.d, p7, { z30.d, z31.d }
+splice z29.h, p7, { z30.h, z31.h }
+splice z29.s, p7, { z30.s, z31.s }
+splice z31.b, p7, z31.b, z31.b
+splice z31.d, p7, z31.d, z31.d
+splice z31.h, p7, z31.h, z31.h
+splice z31.s, p7, z31.s, z31.s
+sqabs z31.b, p7/m, z31.b
+sqabs z31.d, p7/m, z31.d
+sqabs z31.h, p7/m, z31.h
+sqabs z31.s, p7/m, z31.s
+sqadd z0.b, p0/m, z0.b, z1.b
+sqadd z0.b, z0.b, #0
+sqadd z0.b, z0.b, z0.b
+sqadd z0.d, z0.d, #0
+sqadd z0.d, z0.d, #0, lsl #8
+sqadd z0.d, z0.d, z0.d
+sqadd z0.h, p0/m, z0.h, z1.h
+sqadd z0.h, z0.h, #0
+sqadd z0.h, z0.h, #0, lsl #8
+sqadd z0.h, z0.h, z0.h
+sqadd z0.s, z0.s, #0
+sqadd z0.s, z0.s, #0, lsl #8
+sqadd z0.s, z0.s, z0.s
+sqadd z29.s, p7/m, z29.s, z30.s
+sqadd z31.b, z31.b, #255
+sqadd z31.d, p7/m, z31.d, z30.d
+sqadd z31.d, z31.d, #65280
+sqadd z31.h, z31.h, #65280
+sqadd z31.s, z31.s, #65280
+sqcadd z0.b, z0.b, z0.b, #90
+sqcadd z0.d, z0.d, z0.d, #90
+sqcadd z0.h, z0.h, z0.h, #90
+sqcadd z0.s, z0.s, z0.s, #90
+sqcadd z31.b, z31.b, z31.b, #270
+sqcadd z31.d, z31.d, z31.d, #270
+sqcadd z31.h, z31.h, z31.h, #270
+sqcadd z31.s, z31.s, z31.s, #270
+sqdecb x0
+sqdecb x0, #14
+sqdecb x0, all, mul #16
+sqdecb x0, pow2
+sqdecb x0, vl1
+sqdecb x0, w0
+sqdecb x0, w0, all, mul #16
+sqdecb x0, w0, pow2
+sqdecb x0, w0, pow2, mul #16
+sqdecd x0
+sqdecd x0, #14
+sqdecd x0, all, mul #16
+sqdecd x0, pow2
+sqdecd x0, vl1
+sqdecd x0, w0
+sqdecd x0, w0, all, mul #16
+sqdecd x0, w0, pow2
+sqdecd x0, w0, pow2, mul #16
+sqdecd z0.d
+sqdecd z0.d, all, mul #16
+sqdecd z0.d, pow2
+sqdecd z0.d, pow2, mul #16
+sqdech x0
+sqdech x0, #14
+sqdech x0, all, mul #16
+sqdech x0, pow2
+sqdech x0, vl1
+sqdech x0, w0
+sqdech x0, w0, all, mul #16
+sqdech x0, w0, pow2
+sqdech x0, w0, pow2, mul #16
+sqdech z0.h
+sqdech z0.h, all, mul #16
+sqdech z0.h, pow2
+sqdech z0.h, pow2, mul #16
+sqdecp x0, p0.b
+sqdecp x0, p0.d
+sqdecp x0, p0.h
+sqdecp x0, p0.s
+sqdecp xzr, p15.b, wzr
+sqdecp xzr, p15.d, wzr
+sqdecp xzr, p15.h, wzr
+sqdecp xzr, p15.s, wzr
+sqdecp z0.d, p0.d
+sqdecp z0.h, p0.h
+sqdecp z0.s, p0.s
+sqdecw x0
+sqdecw x0, #14
+sqdecw x0, all, mul #16
+sqdecw x0, pow2
+sqdecw x0, vl1
+sqdecw x0, w0
+sqdecw x0, w0, all, mul #16
+sqdecw x0, w0, pow2
+sqdecw x0, w0, pow2, mul #16
+sqdecw z0.s
+sqdecw z0.s, all, mul #16
+sqdecw z0.s, pow2
+sqdecw z0.s, pow2, mul #16
+sqdmlalb z0.d, z1.s, z15.s[3]
+sqdmlalb z0.d, z1.s, z31.s
+sqdmlalb z0.h, z1.b, z31.b
+sqdmlalb z0.s, z1.h, z31.h
+sqdmlalb z0.s, z1.h, z7.h[7]
+sqdmlalbt z0.d, z1.s, z31.s
+sqdmlalbt z0.h, z1.b, z31.b
+sqdmlalbt z0.s, z1.h, z31.h
+sqdmlalt z0.d, z1.s, z15.s[3]
+sqdmlalt z0.d, z1.s, z31.s
+sqdmlalt z0.h, z1.b, z31.b
+sqdmlalt z0.s, z1.h, z31.h
+sqdmlalt z0.s, z1.h, z7.h[7]
+sqdmlslb z0.d, z1.s, z15.s[3]
+sqdmlslb z0.d, z1.s, z31.s
+sqdmlslb z0.h, z1.b, z31.b
+sqdmlslb z0.s, z1.h, z31.h
+sqdmlslb z0.s, z1.h, z7.h[7]
+sqdmlslbt z0.d, z1.s, z31.s
+sqdmlslbt z0.h, z1.b, z31.b
+sqdmlslbt z0.s, z1.h, z31.h
+sqdmlslt z0.d, z1.s, z15.s[3]
+sqdmlslt z0.d, z1.s, z31.s
+sqdmlslt z0.h, z1.b, z31.b
+sqdmlslt z0.s, z1.h, z31.h
+sqdmlslt z0.s, z1.h, z7.h[7]
+sqdmulh z0.b, z1.b, z2.b
+sqdmulh z0.d, z1.d, z15.d[1]
+sqdmulh z0.h, z1.h, z2.h
+sqdmulh z0.h, z1.h, z7.h[7]
+sqdmulh z0.s, z1.s, z7.s[3]
+sqdmulh z29.s, z30.s, z31.s
+sqdmulh z31.d, z31.d, z31.d
+sqdmullb z0.d, z1.s, z15.s[1]
+sqdmullb z0.h, z1.b, z2.b
+sqdmullb z0.s, z1.h, z7.h[7]
+sqdmullb z29.s, z30.h, z31.h
+sqdmullb z31.d, z31.s, z31.s
+sqdmullt z0.d, z1.s, z15.s[1]
+sqdmullt z0.h, z1.b, z2.b
+sqdmullt z0.s, z1.h, z7.h[7]
+sqdmullt z29.s, z30.h, z31.h
+sqdmullt z31.d, z31.s, z31.s
+sqincb x0
+sqincb x0, #14
+sqincb x0, all, mul #16
+sqincb x0, pow2
+sqincb x0, vl1
+sqincb x0, w0
+sqincb x0, w0, all, mul #16
+sqincb x0, w0, pow2
+sqincb x0, w0, pow2, mul #16
+sqincd x0
+sqincd x0, #14
+sqincd x0, all, mul #16
+sqincd x0, pow2
+sqincd x0, vl1
+sqincd x0, w0
+sqincd x0, w0, all, mul #16
+sqincd x0, w0, pow2
+sqincd x0, w0, pow2, mul #16
+sqincd z0.d
+sqincd z0.d, all, mul #16
+sqincd z0.d, pow2
+sqincd z0.d, pow2, mul #16
+sqinch x0
+sqinch x0, #14
+sqinch x0, all, mul #16
+sqinch x0, pow2
+sqinch x0, vl1
+sqinch x0, w0
+sqinch x0, w0, all, mul #16
+sqinch x0, w0, pow2
+sqinch x0, w0, pow2, mul #16
+sqinch z0.h
+sqinch z0.h, all, mul #16
+sqinch z0.h, pow2
+sqinch z0.h, pow2, mul #16
+sqincp x0, p0.b
+sqincp x0, p0.d
+sqincp x0, p0.h
+sqincp x0, p0.s
+sqincp xzr, p15.b, wzr
+sqincp xzr, p15.d, wzr
+sqincp xzr, p15.h, wzr
+sqincp xzr, p15.s, wzr
+sqincp z0.d, p0.d
+sqincp z0.h, p0.h
+sqincp z0.s, p0.s
+sqincw x0
+sqincw x0, #14
+sqincw x0, all, mul #16
+sqincw x0, pow2
+sqincw x0, vl1
+sqincw x0, w0
+sqincw x0, w0, all, mul #16
+sqincw x0, w0, pow2
+sqincw x0, w0, pow2, mul #16
+sqincw z0.s
+sqincw z0.s, all, mul #16
+sqincw z0.s, pow2
+sqincw z0.s, pow2, mul #16
+sqneg z31.b, p7/m, z31.b
+sqneg z31.d, p7/m, z31.d
+sqneg z31.h, p7/m, z31.h
+sqneg z31.s, p7/m, z31.s
+sqrdcmlah z0.b, z1.b, z2.b, #0
+sqrdcmlah z0.d, z1.d, z2.d, #0
+sqrdcmlah z0.h, z1.h, z2.h, #0
+sqrdcmlah z0.h, z1.h, z2.h[0], #0
+sqrdcmlah z0.s, z1.s, z2.s, #0
+sqrdcmlah z0.s, z1.s, z2.s[0], #0
+sqrdcmlah z15.b, z16.b, z17.b, #270
+sqrdcmlah z15.d, z16.d, z17.d, #270
+sqrdcmlah z15.h, z16.h, z17.h, #270
+sqrdcmlah z15.s, z16.s, z17.s, #270
+sqrdcmlah z29.b, z30.b, z31.b, #90
+sqrdcmlah z29.d, z30.d, z31.d, #90
+sqrdcmlah z29.h, z30.h, z31.h, #90
+sqrdcmlah z29.s, z30.s, z31.s, #90
+sqrdcmlah z31.b, z31.b, z31.b, #180
+sqrdcmlah z31.d, z31.d, z31.d, #180
+sqrdcmlah z31.h, z30.h, z7.h[0], #180
+sqrdcmlah z31.h, z31.h, z31.h, #180
+sqrdcmlah z31.s, z30.s, z7.s[0], #180
+sqrdcmlah z31.s, z31.s, z31.s, #180
+sqrdmlah z0.b, z1.b, z31.b
+sqrdmlah z0.d, z1.d, z15.d[1]
+sqrdmlah z0.d, z1.d, z31.d
+sqrdmlah z0.h, z1.h, z31.h
+sqrdmlah z0.h, z1.h, z7.h[7]
+sqrdmlah z0.s, z1.s, z31.s
+sqrdmlah z0.s, z1.s, z7.s[3]
+sqrdmlsh z0.b, z1.b, z31.b
+sqrdmlsh z0.d, z1.d, z15.d[1]
+sqrdmlsh z0.d, z1.d, z31.d
+sqrdmlsh z0.h, z1.h, z31.h
+sqrdmlsh z0.h, z1.h, z7.h[7]
+sqrdmlsh z0.s, z1.s, z31.s
+sqrdmlsh z0.s, z1.s, z7.s[3]
+sqrdmulh z0.b, z1.b, z2.b
+sqrdmulh z0.d, z1.d, z15.d[1]
+sqrdmulh z0.h, z1.h, z2.h
+sqrdmulh z0.h, z1.h, z7.h[7]
+sqrdmulh z0.s, z1.s, z7.s[3]
+sqrdmulh z29.s, z30.s, z31.s
+sqrdmulh z31.d, z31.d, z31.d
+sqrshl z0.b, p0/m, z0.b, z1.b
+sqrshl z0.h, p0/m, z0.h, z1.h
+sqrshl z29.s, p7/m, z29.s, z30.s
+sqrshl z31.d, p7/m, z31.d, z30.d
+sqrshlr z0.b, p0/m, z0.b, z1.b
+sqrshlr z0.h, p0/m, z0.h, z1.h
+sqrshlr z29.s, p7/m, z29.s, z30.s
+sqrshlr z31.d, p7/m, z31.d, z30.d
+sqrshrnb z0.b, z0.h, #1
+sqrshrnb z0.h, z0.s, #1
+sqrshrnb z0.s, z0.d, #1
+sqrshrnb z31.b, z31.h, #8
+sqrshrnb z31.h, z31.s, #16
+sqrshrnb z31.s, z31.d, #32
+sqrshrnt z0.b, z0.h, #1
+sqrshrnt z0.h, z0.s, #1
+sqrshrnt z0.s, z0.d, #1
+sqrshrnt z31.b, z31.h, #8
+sqrshrnt z31.h, z31.s, #16
+sqrshrnt z31.s, z31.d, #32
+sqrshrunb z0.b, z0.h, #1
+sqrshrunb z0.h, z0.s, #1
+sqrshrunb z0.s, z0.d, #1
+sqrshrunb z31.b, z31.h, #8
+sqrshrunb z31.h, z31.s, #16
+sqrshrunb z31.s, z31.d, #32
+sqrshrunt z0.b, z0.h, #1
+sqrshrunt z0.h, z0.s, #1
+sqrshrunt z0.s, z0.d, #1
+sqrshrunt z31.b, z31.h, #8
+sqrshrunt z31.h, z31.s, #16
+sqrshrunt z31.s, z31.d, #32
+sqshl z0.b, p0/m, z0.b, #0
+sqshl z0.b, p0/m, z0.b, z1.b
+sqshl z0.d, p0/m, z0.d, #0
+sqshl z0.h, p0/m, z0.h, #0
+sqshl z0.h, p0/m, z0.h, z1.h
+sqshl z0.s, p0/m, z0.s, #0
+sqshl z29.s, p7/m, z29.s, z30.s
+sqshl z31.b, p0/m, z31.b, #7
+sqshl z31.d, p0/m, z31.d, #63
+sqshl z31.d, p7/m, z31.d, z30.d
+sqshl z31.h, p0/m, z31.h, #15
+sqshl z31.s, p0/m, z31.s, #31
+sqshlr z0.b, p0/m, z0.b, z1.b
+sqshlr z0.h, p0/m, z0.h, z1.h
+sqshlr z29.s, p7/m, z29.s, z30.s
+sqshlr z31.d, p7/m, z31.d, z30.d
+sqshlu z0.b, p0/m, z0.b, #0
+sqshlu z0.d, p0/m, z0.d, #0
+sqshlu z0.h, p0/m, z0.h, #0
+sqshlu z0.s, p0/m, z0.s, #0
+sqshlu z31.b, p0/m, z31.b, #7
+sqshlu z31.d, p0/m, z31.d, #63
+sqshlu z31.h, p0/m, z31.h, #15
+sqshlu z31.s, p0/m, z31.s, #31
+sqshrnb z0.b, z0.h, #1
+sqshrnb z0.h, z0.s, #1
+sqshrnb z0.s, z0.d, #1
+sqshrnb z31.b, z31.h, #8
+sqshrnb z31.h, z31.s, #16
+sqshrnb z31.s, z31.d, #32
+sqshrnt z0.b, z0.h, #1
+sqshrnt z0.h, z0.s, #1
+sqshrnt z0.s, z0.d, #1
+sqshrnt z31.b, z31.h, #8
+sqshrnt z31.h, z31.s, #16
+sqshrnt z31.s, z31.d, #32
+sqshrunb z0.b, z0.h, #1
+sqshrunb z0.h, z0.s, #1
+sqshrunb z0.s, z0.d, #1
+sqshrunb z31.b, z31.h, #8
+sqshrunb z31.h, z31.s, #16
+sqshrunb z31.s, z31.d, #32
+sqshrunt z0.b, z0.h, #1
+sqshrunt z0.h, z0.s, #1
+sqshrunt z0.s, z0.d, #1
+sqshrunt z31.b, z31.h, #8
+sqshrunt z31.h, z31.s, #16
+sqshrunt z31.s, z31.d, #32
+sqsub z0.b, p0/m, z0.b, z1.b
+sqsub z0.b, z0.b, #0
+sqsub z0.b, z0.b, z0.b
+sqsub z0.d, z0.d, #0
+sqsub z0.d, z0.d, #0, lsl #8
+sqsub z0.d, z0.d, z0.d
+sqsub z0.h, p0/m, z0.h, z1.h
+sqsub z0.h, z0.h, #0
+sqsub z0.h, z0.h, #0, lsl #8
+sqsub z0.h, z0.h, z0.h
+sqsub z0.s, z0.s, #0
+sqsub z0.s, z0.s, #0, lsl #8
+sqsub z0.s, z0.s, z0.s
+sqsub z29.s, p7/m, z29.s, z30.s
+sqsub z31.b, z31.b, #255
+sqsub z31.d, p7/m, z31.d, z30.d
+sqsub z31.d, z31.d, #65280
+sqsub z31.h, z31.h, #65280
+sqsub z31.s, z31.s, #65280
+sqsubr z0.b, p0/m, z0.b, z1.b
+sqsubr z0.h, p0/m, z0.h, z1.h
+sqsubr z29.s, p7/m, z29.s, z30.s
+sqsubr z31.d, p7/m, z31.d, z30.d
+sqxtnb z0.b, z31.h
+sqxtnb z0.h, z31.s
+sqxtnb z0.s, z31.d
+sqxtnt z0.b, z31.h
+sqxtnt z0.h, z31.s
+sqxtnt z0.s, z31.d
+sqxtunb z0.b, z31.h
+sqxtunb z0.h, z31.s
+sqxtunb z0.s, z31.d
+sqxtunt z0.b, z31.h
+sqxtunt z0.h, z31.s
+sqxtunt z0.s, z31.d
+srhadd z0.b, p0/m, z0.b, z1.b
+srhadd z0.h, p0/m, z0.h, z1.h
+srhadd z29.s, p7/m, z29.s, z30.s
+srhadd z31.d, p7/m, z31.d, z30.d
+sri z0.b, z0.b, #1
+sri z0.d, z0.d, #1
+sri z0.h, z0.h, #1
+sri z0.s, z0.s, #1
+sri z31.b, z31.b, #8
+sri z31.d, z31.d, #64
+sri z31.h, z31.h, #16
+sri z31.s, z31.s, #32
+srshl z0.b, p0/m, z0.b, z1.b
+srshl z0.h, p0/m, z0.h, z1.h
+srshl z29.s, p7/m, z29.s, z30.s
+srshl z31.d, p7/m, z31.d, z30.d
+srshlr z0.b, p0/m, z0.b, z1.b
+srshlr z0.h, p0/m, z0.h, z1.h
+srshlr z29.s, p7/m, z29.s, z30.s
+srshlr z31.d, p7/m, z31.d, z30.d
+srshr z0.b, p0/m, z0.b, #1
+srshr z0.d, p0/m, z0.d, #1
+srshr z0.h, p0/m, z0.h, #1
+srshr z0.s, p0/m, z0.s, #1
+srshr z31.b, p0/m, z31.b, #8
+srshr z31.d, p0/m, z31.d, #64
+srshr z31.h, p0/m, z31.h, #16
+srshr z31.s, p0/m, z31.s, #32
+srsra z0.b, z0.b, #1
+srsra z0.d, z0.d, #1
+srsra z0.h, z0.h, #1
+srsra z0.s, z0.s, #1
+srsra z31.b, z31.b, #8
+srsra z31.d, z31.d, #64
+srsra z31.h, z31.h, #16
+srsra z31.s, z31.s, #32
+sshllb z0.d, z0.s, #0
+sshllb z0.h, z0.b, #0
+sshllb z0.s, z0.h, #0
+sshllb z31.d, z31.s, #31
+sshllb z31.h, z31.b, #7
+sshllb z31.s, z31.h, #15
+sshllt z0.d, z0.s, #0
+sshllt z0.h, z0.b, #0
+sshllt z0.s, z0.h, #0
+sshllt z31.d, z31.s, #31
+sshllt z31.h, z31.b, #7
+sshllt z31.s, z31.h, #15
+ssra z0.b, z0.b, #1
+ssra z0.d, z0.d, #1
+ssra z0.h, z0.h, #1
+ssra z0.s, z0.s, #1
+ssra z31.b, z31.b, #8
+ssra z31.d, z31.d, #64
+ssra z31.h, z31.h, #16
+ssra z31.s, z31.s, #32
+ssublb z0.h, z1.b, z2.b
+ssublb z29.s, z30.h, z31.h
+ssublb z31.d, z31.s, z31.s
+ssublbt z0.d, z1.s, z31.s
+ssublbt z0.h, z1.b, z31.b
+ssublbt z0.s, z1.h, z31.h
+ssublt z0.h, z1.b, z2.b
+ssublt z29.s, z30.h, z31.h
+ssublt z31.d, z31.s, z31.s
+ssubltb z0.d, z1.s, z31.s
+ssubltb z0.h, z1.b, z31.b
+ssubltb z0.s, z1.h, z31.h
+ssubwb z0.h, z1.h, z2.b
+ssubwb z29.s, z30.s, z31.h
+ssubwb z31.d, z31.d, z31.s
+ssubwt z0.h, z1.h, z2.b
+ssubwt z29.s, z30.s, z31.h
+ssubwt z31.d, z31.d, z31.s
+st1b { z0.b }, p0, [x0, x0]
+st1b { z0.b }, p0, [x0]
+st1b { z0.d }, p0, [x0, x0]
+st1b { z0.d }, p0, [x0, z0.d, sxtw]
+st1b { z0.d }, p0, [x0, z0.d, uxtw]
+st1b { z0.d }, p0, [x0, z0.d]
+st1b { z0.d }, p0, [x0]
+st1b { z0.d }, p7, [z0.d]
+st1b { z0.h }, p0, [x0, x0]
+st1b { z0.h }, p0, [x0]
+st1b { z0.s }, p0, [x0, x0]
+st1b { z0.s }, p0, [x0, z0.s, sxtw]
+st1b { z0.s }, p0, [x0, z0.s, uxtw]
+st1b { z0.s }, p0, [x0]
+st1b { z0.s }, p7, [z0.s]
+st1b { z21.b }, p5, [x10, #5, mul vl]
+st1b { z21.d }, p5, [x10, #5, mul vl]
+st1b { z21.h }, p5, [x10, #5, mul vl]
+st1b { z21.s }, p5, [x10, #5, mul vl]
+st1b { z31.b }, p7, [sp, #-1, mul vl]
+st1b { z31.d }, p7, [sp, #-1, mul vl]
+st1b { z31.d }, p7, [z31.d, #31]
+st1b { z31.h }, p7, [sp, #-1, mul vl]
+st1b { z31.s }, p7, [sp, #-1, mul vl]
+st1b { z31.s }, p7, [z31.s, #31]
+st1d { z0.d }, p0, [x0, x0, lsl #3]
+st1d { z0.d }, p0, [x0, z0.d, lsl #3]
+st1d { z0.d }, p0, [x0, z0.d, sxtw #3]
+st1d { z0.d }, p0, [x0, z0.d, sxtw]
+st1d { z0.d }, p0, [x0, z0.d, uxtw #3]
+st1d { z0.d }, p0, [x0, z0.d, uxtw]
+st1d { z0.d }, p0, [x0, z0.d]
+st1d { z0.d }, p0, [x0]
+st1d { z0.d }, p7, [z0.d]
+st1d { z21.d }, p5, [x10, #5, mul vl]
+st1d { z31.d }, p7, [sp, #-1, mul vl]
+st1d { z31.d }, p7, [z31.d, #248]
+st1h { z0.d }, p0, [x0, x0, lsl #1]
+st1h { z0.d }, p0, [x0, z0.d, lsl #1]
+st1h { z0.d }, p0, [x0, z0.d, sxtw #1]
+st1h { z0.d }, p0, [x0, z0.d, sxtw]
+st1h { z0.d }, p0, [x0, z0.d, uxtw #1]
+st1h { z0.d }, p0, [x0, z0.d, uxtw]
+st1h { z0.d }, p0, [x0, z0.d]
+st1h { z0.d }, p0, [x0]
+st1h { z0.d }, p7, [z0.d]
+st1h { z0.h }, p0, [x0, x0, lsl #1]
+st1h { z0.h }, p0, [x0]
+st1h { z0.s }, p0, [x0, x0, lsl #1]
+st1h { z0.s }, p0, [x0, z0.s, sxtw #1]
+st1h { z0.s }, p0, [x0, z0.s, sxtw]
+st1h { z0.s }, p0, [x0, z0.s, uxtw #1]
+st1h { z0.s }, p0, [x0, z0.s, uxtw]
+st1h { z0.s }, p0, [x0]
+st1h { z0.s }, p7, [z0.s]
+st1h { z21.d }, p5, [x10, #5, mul vl]
+st1h { z21.h }, p5, [x10, #5, mul vl]
+st1h { z21.s }, p5, [x10, #5, mul vl]
+st1h { z31.d }, p7, [sp, #-1, mul vl]
+st1h { z31.d }, p7, [z31.d, #62]
+st1h { z31.h }, p7, [sp, #-1, mul vl]
+st1h { z31.s }, p7, [sp, #-1, mul vl]
+st1h { z31.s }, p7, [z31.s, #62]
+st1w { z0.d }, p0, [x0, x0, lsl #2]
+st1w { z0.d }, p0, [x0, z0.d, lsl #2]
+st1w { z0.d }, p0, [x0, z0.d, sxtw #2]
+st1w { z0.d }, p0, [x0, z0.d, sxtw]
+st1w { z0.d }, p0, [x0, z0.d, uxtw #2]
+st1w { z0.d }, p0, [x0, z0.d, uxtw]
+st1w { z0.d }, p0, [x0, z0.d]
+st1w { z0.d }, p0, [x0]
+st1w { z0.d }, p7, [z0.d]
+st1w { z0.s }, p0, [x0, x0, lsl #2]
+st1w { z0.s }, p0, [x0, z0.s, sxtw #2]
+st1w { z0.s }, p0, [x0, z0.s, sxtw]
+st1w { z0.s }, p0, [x0, z0.s, uxtw #2]
+st1w { z0.s }, p0, [x0, z0.s, uxtw]
+st1w { z0.s }, p0, [x0]
+st1w { z0.s }, p7, [z0.s]
+st1w { z21.d }, p5, [x10, #5, mul vl]
+st1w { z21.s }, p5, [x10, #5, mul vl]
+st1w { z31.d }, p7, [sp, #-1, mul vl]
+st1w { z31.d }, p7, [z31.d, #124]
+st1w { z31.s }, p7, [sp, #-1, mul vl]
+st1w { z31.s }, p7, [z31.s, #124]
+st2b { z0.b, z1.b }, p0, [x0, x0]
+st2b { z0.b, z1.b }, p0, [x0]
+st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
+st2b { z5.b, z6.b }, p3, [x17, x16]
+st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
+st2d { z0.d, z1.d }, p0, [x0]
+st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
+st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
+st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
+st2h { z0.h, z1.h }, p0, [x0]
+st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
+st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
+st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
+st2w { z0.s, z1.s }, p0, [x0]
+st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
+st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
+st3b { z0.b, z1.b, z2.b }, p0, [x0, x0]
+st3b { z0.b, z1.b, z2.b }, p0, [x0]
+st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
+st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
+st3b { z5.b, z6.b, z7.b }, p3, [x17, x16]
+st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
+st3d { z0.d, z1.d, z2.d }, p0, [x0]
+st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
+st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
+st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
+st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
+st3h { z0.h, z1.h, z2.h }, p0, [x0]
+st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
+st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
+st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
+st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
+st3w { z0.s, z1.s, z2.s }, p0, [x0]
+st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
+st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
+st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
+st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
+st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
+st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
+st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
+st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
+st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
+st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
+st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
+st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
+st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
+st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
+st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
+st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
+st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
+st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
+st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
+st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
+st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
+stnt1b { z0.b }, p0, [x0, x0]
+stnt1b { z0.b }, p0, [x0]
+stnt1b { z0.d }, p0, [z1.d]
+stnt1b { z0.s }, p0, [z1.s]
+stnt1b { z21.b }, p5, [x10, #7, mul vl]
+stnt1b { z23.b }, p3, [x13, #-8, mul vl]
+stnt1b { z31.d }, p7, [z31.d, x0]
+stnt1b { z31.d }, p7, [z31.d]
+stnt1b { z31.s }, p7, [z31.s, x0]
+stnt1b { z31.s }, p7, [z31.s]
+stnt1d { z0.d }, p0, [x0, x0, lsl #3]
+stnt1d { z0.d }, p0, [x0]
+stnt1d { z0.d }, p0, [z1.d]
+stnt1d { z21.d }, p5, [x10, #7, mul vl]
+stnt1d { z23.d }, p3, [x13, #-8, mul vl]
+stnt1d { z31.d }, p7, [z31.d, x0]
+stnt1d { z31.d }, p7, [z31.d]
+stnt1h { z0.d }, p0, [z1.d]
+stnt1h { z0.h }, p0, [x0, x0, lsl #1]
+stnt1h { z0.h }, p0, [x0]
+stnt1h { z0.s }, p0, [z1.s]
+stnt1h { z21.h }, p5, [x10, #7, mul vl]
+stnt1h { z23.h }, p3, [x13, #-8, mul vl]
+stnt1h { z31.d }, p7, [z31.d, x0]
+stnt1h { z31.d }, p7, [z31.d]
+stnt1h { z31.s }, p7, [z31.s, x0]
+stnt1h { z31.s }, p7, [z31.s]
+stnt1w { z0.d }, p0, [z1.d]
+stnt1w { z0.s }, p0, [x0, x0, lsl #2]
+stnt1w { z0.s }, p0, [x0]
+stnt1w { z0.s }, p0, [z1.s]
+stnt1w { z21.s }, p5, [x10, #7, mul vl]
+stnt1w { z23.s }, p3, [x13, #-8, mul vl]
+stnt1w { z31.d }, p7, [z31.d, x0]
+stnt1w { z31.d }, p7, [z31.d]
+stnt1w { z31.s }, p7, [z31.s, x0]
+stnt1w { z31.s }, p7, [z31.s]
+str p0, [x0]
+str p15, [sp, #-256, mul vl]
+str p5, [x10, #255, mul vl]
+str z0, [x0]
+str z21, [x10, #-256, mul vl]
+str z31, [sp, #255, mul vl]
+sub z0.b, p0/m, z0.b, z0.b
+sub z0.b, z0.b, #0
+sub z0.b, z0.b, z0.b
+sub z0.d, p0/m, z0.d, z0.d
+sub z0.d, z0.d, #0
+sub z0.d, z0.d, #0, lsl #8
+sub z0.d, z0.d, z0.d
+sub z0.h, p0/m, z0.h, z0.h
+sub z0.h, z0.h, #0
+sub z0.h, z0.h, #0, lsl #8
+sub z0.h, z0.h, z0.h
+sub z0.s, p0/m, z0.s, z0.s
+sub z0.s, z0.s, #0
+sub z0.s, z0.s, #0, lsl #8
+sub z0.s, z0.s, z0.s
+sub z21.b, p5/m, z21.b, z10.b
+sub z21.b, z10.b, z21.b
+sub z21.d, p5/m, z21.d, z10.d
+sub z21.d, z10.d, z21.d
+sub z21.h, p5/m, z21.h, z10.h
+sub z21.h, z10.h, z21.h
+sub z21.s, p5/m, z21.s, z10.s
+sub z21.s, z10.s, z21.s
+sub z23.b, p3/m, z23.b, z13.b
+sub z23.b, z13.b, z8.b
+sub z23.d, p3/m, z23.d, z13.d
+sub z23.d, z13.d, z8.d
+sub z23.h, p3/m, z23.h, z13.h
+sub z23.h, z13.h, z8.h
+sub z23.s, p3/m, z23.s, z13.s
+sub z23.s, z13.s, z8.s
+sub z31.b, p7/m, z31.b, z31.b
+sub z31.b, z31.b, #255
+sub z31.b, z31.b, z31.b
+sub z31.d, p7/m, z31.d, z31.d
+sub z31.d, z31.d, #65280
+sub z31.d, z31.d, z31.d
+sub z31.h, p7/m, z31.h, z31.h
+sub z31.h, z31.h, #65280
+sub z31.h, z31.h, z31.h
+sub z31.s, p7/m, z31.s, z31.s
+sub z31.s, z31.s, #65280
+sub z31.s, z31.s, z31.s
+subhnb z0.b, z1.h, z31.h
+subhnb z0.h, z1.s, z31.s
+subhnb z0.s, z1.d, z31.d
+subhnt z0.b, z1.h, z31.h
+subhnt z0.h, z1.s, z31.s
+subhnt z0.s, z1.d, z31.d
+subr z0.b, p0/m, z0.b, z0.b
+subr z0.b, z0.b, #0
+subr z0.d, p0/m, z0.d, z0.d
+subr z0.d, z0.d, #0
+subr z0.d, z0.d, #0, lsl #8
+subr z0.h, p0/m, z0.h, z0.h
+subr z0.h, z0.h, #0
+subr z0.h, z0.h, #0, lsl #8
+subr z0.s, p0/m, z0.s, z0.s
+subr z0.s, z0.s, #0
+subr z0.s, z0.s, #0, lsl #8
+subr z31.b, z31.b, #255
+subr z31.d, z31.d, #65280
+subr z31.h, z31.h, #65280
+subr z31.s, z31.s, #65280
+sunpkhi z31.d, z31.s
+sunpkhi z31.h, z31.b
+sunpkhi z31.s, z31.h
+sunpklo z31.d, z31.s
+sunpklo z31.h, z31.b
+sunpklo z31.s, z31.h
+suqadd z0.b, p0/m, z0.b, z1.b
+suqadd z0.h, p0/m, z0.h, z1.h
+suqadd z29.s, p7/m, z29.s, z30.s
+suqadd z31.d, p7/m, z31.d, z30.d
+sxtb z0.d, p0/m, z0.d
+sxtb z0.h, p0/m, z0.h
+sxtb z0.s, p0/m, z0.s
+sxtb z31.d, p7/m, z31.d
+sxtb z31.h, p7/m, z31.h
+sxtb z31.s, p7/m, z31.s
+sxth z0.d, p0/m, z0.d
+sxth z0.s, p0/m, z0.s
+sxth z31.d, p7/m, z31.d
+sxth z31.s, p7/m, z31.s
+sxtw z0.d, p0/m, z0.d
+sxtw z31.d, p7/m, z31.d
+tbl z28.b, { z29.b, z30.b }, z31.b
+tbl z28.d, { z29.d, z30.d }, z31.d
+tbl z28.h, { z29.h, z30.h }, z31.h
+tbl z28.s, { z29.s, z30.s }, z31.s
+tbl z31.b, { z31.b }, z31.b
+tbl z31.d, { z31.d }, z31.d
+tbl z31.h, { z31.h }, z31.h
+tbl z31.s, { z31.s }, z31.s
+tbx z31.b, z31.b, z31.b
+tbx z31.d, z31.d, z31.d
+tbx z31.h, z31.h, z31.h
+tbx z31.s, z31.s, z31.s
+trn1 p15.b, p15.b, p15.b
+trn1 p15.d, p15.d, p15.d
+trn1 p15.h, p15.h, p15.h
+trn1 p15.s, p15.s, p15.s
+trn1 z31.b, z31.b, z31.b
+trn1 z31.d, z31.d, z31.d
+trn1 z31.h, z31.h, z31.h
+trn1 z31.s, z31.s, z31.s
+trn2 p15.b, p15.b, p15.b
+trn2 p15.d, p15.d, p15.d
+trn2 p15.h, p15.h, p15.h
+trn2 p15.s, p15.s, p15.s
+trn2 z31.b, z31.b, z31.b
+trn2 z31.d, z31.d, z31.d
+trn2 z31.h, z31.h, z31.h
+trn2 z31.s, z31.s, z31.s
+uaba z0.b, z1.b, z31.b
+uaba z0.d, z1.d, z31.d
+uaba z0.h, z1.h, z31.h
+uaba z0.s, z1.s, z31.s
+uabalb z0.d, z1.s, z31.s
+uabalb z0.h, z1.b, z31.b
+uabalb z0.s, z1.h, z31.h
+uabalt z0.d, z1.s, z31.s
+uabalt z0.h, z1.b, z31.b
+uabalt z0.s, z1.h, z31.h
+uabd z31.b, p7/m, z31.b, z31.b
+uabd z31.d, p7/m, z31.d, z31.d
+uabd z31.h, p7/m, z31.h, z31.h
+uabd z31.s, p7/m, z31.s, z31.s
+uabdlb z0.h, z1.b, z2.b
+uabdlb z29.s, z30.h, z31.h
+uabdlb z31.d, z31.s, z31.s
+uabdlt z0.h, z1.b, z2.b
+uabdlt z29.s, z30.h, z31.h
+uabdlt z31.d, z31.s, z31.s
+uadalp z0.h, p0/m, z1.b
+uadalp z29.s, p0/m, z30.h
+uadalp z30.d, p7/m, z31.s
+uaddlb z0.h, z1.b, z2.b
+uaddlb z29.s, z30.h, z31.h
+uaddlb z31.d, z31.s, z31.s
+uaddlt z0.h, z1.b, z2.b
+uaddlt z29.s, z30.h, z31.h
+uaddlt z31.d, z31.s, z31.s
+uaddv d0, p7, z31.b
+uaddv d0, p7, z31.d
+uaddv d0, p7, z31.h
+uaddv d0, p7, z31.s
+uaddwb z0.h, z1.h, z2.b
+uaddwb z29.s, z30.s, z31.h
+uaddwb z31.d, z31.d, z31.s
+uaddwt z0.h, z1.h, z2.b
+uaddwt z29.s, z30.s, z31.h
+uaddwt z31.d, z31.d, z31.s
+ucvtf z0.d, p0/m, z0.d
+ucvtf z0.d, p0/m, z0.s
+ucvtf z0.h, p0/m, z0.d
+ucvtf z0.h, p0/m, z0.h
+ucvtf z0.h, p0/m, z0.s
+ucvtf z0.s, p0/m, z0.d
+ucvtf z0.s, p0/m, z0.s
+udiv z0.d, p7/m, z0.d, z31.d
+udiv z0.s, p7/m, z0.s, z31.s
+udivr z0.d, p7/m, z0.d, z31.d
+udivr z0.s, p7/m, z0.s, z31.s
+udot z0.d, z1.h, z15.h[1]
+udot z0.d, z1.h, z31.h
+udot z0.s, z1.b, z31.b
+udot z0.s, z1.b, z7.b[3]
+uhadd z0.b, p0/m, z0.b, z1.b
+uhadd z0.h, p0/m, z0.h, z1.h
+uhadd z29.s, p7/m, z29.s, z30.s
+uhadd z31.d, p7/m, z31.d, z30.d
+uhsub z0.b, p0/m, z0.b, z1.b
+uhsub z0.h, p0/m, z0.h, z1.h
+uhsub z29.s, p7/m, z29.s, z30.s
+uhsub z31.d, p7/m, z31.d, z30.d
+uhsubr z0.b, p0/m, z0.b, z1.b
+uhsubr z0.h, p0/m, z0.h, z1.h
+uhsubr z29.s, p7/m, z29.s, z30.s
+uhsubr z31.d, p7/m, z31.d, z30.d
+umax z0.b, z0.b, #0
+umax z31.b, p7/m, z31.b, z31.b
+umax z31.b, z31.b, #255
+umax z31.d, p7/m, z31.d, z31.d
+umax z31.h, p7/m, z31.h, z31.h
+umax z31.s, p7/m, z31.s, z31.s
+umaxp z0.b, p0/m, z0.b, z1.b
+umaxp z0.h, p0/m, z0.h, z1.h
+umaxp z29.s, p7/m, z29.s, z30.s
+umaxp z31.d, p7/m, z31.d, z30.d
+umaxv b0, p7, z31.b
+umaxv d0, p7, z31.d
+umaxv h0, p7, z31.h
+umaxv s0, p7, z31.s
+umin z0.b, z0.b, #0
+umin z31.b, p7/m, z31.b, z31.b
+umin z31.b, z31.b, #255
+umin z31.d, p7/m, z31.d, z31.d
+umin z31.h, p7/m, z31.h, z31.h
+umin z31.s, p7/m, z31.s, z31.s
+uminp z0.b, p0/m, z0.b, z1.b
+uminp z0.h, p0/m, z0.h, z1.h
+uminp z29.s, p7/m, z29.s, z30.s
+uminp z31.d, p7/m, z31.d, z30.d
+uminv b0, p7, z31.b
+uminv d0, p7, z31.d
+uminv h0, p7, z31.h
+uminv s0, p7, z31.s
+umlalb z0.d, z1.s, z15.s[1]
+umlalb z0.d, z1.s, z31.s
+umlalb z0.h, z1.b, z31.b
+umlalb z0.s, z1.h, z31.h
+umlalb z0.s, z1.h, z7.h[7]
+umlalt z0.d, z1.s, z15.s[1]
+umlalt z0.d, z1.s, z31.s
+umlalt z0.h, z1.b, z31.b
+umlalt z0.s, z1.h, z31.h
+umlalt z0.s, z1.h, z7.h[7]
+umlslb z0.d, z1.s, z15.s[1]
+umlslb z0.d, z1.s, z31.s
+umlslb z0.h, z1.b, z31.b
+umlslb z0.s, z1.h, z31.h
+umlslb z0.s, z1.h, z7.h[7]
+umlslt z0.d, z1.s, z15.s[1]
+umlslt z0.d, z1.s, z31.s
+umlslt z0.h, z1.b, z31.b
+umlslt z0.s, z1.h, z31.h
+umlslt z0.s, z1.h, z7.h[7]
+ummla z0.s, z1.b, z2.b
+umulh z0.b, p7/m, z0.b, z31.b
+umulh z0.b, z1.b, z2.b
+umulh z0.d, p7/m, z0.d, z31.d
+umulh z0.h, p7/m, z0.h, z31.h
+umulh z0.h, z1.h, z2.h
+umulh z0.s, p7/m, z0.s, z31.s
+umulh z29.s, z30.s, z31.s
+umulh z31.d, z31.d, z31.d
+umullb z0.d, z1.s, z15.s[1]
+umullb z0.h, z1.b, z2.b
+umullb z0.s, z1.h, z7.h[7]
+umullb z29.s, z30.h, z31.h
+umullb z31.d, z31.s, z31.s
+umullt z0.d, z1.s, z15.s[1]
+umullt z0.h, z1.b, z2.b
+umullt z0.s, z1.h, z7.h[7]
+umullt z29.s, z30.h, z31.h
+umullt z31.d, z31.s, z31.s
+uqadd z0.b, p0/m, z0.b, z1.b
+uqadd z0.b, z0.b, #0
+uqadd z0.b, z0.b, z0.b
+uqadd z0.d, z0.d, #0
+uqadd z0.d, z0.d, #0, lsl #8
+uqadd z0.d, z0.d, z0.d
+uqadd z0.h, p0/m, z0.h, z1.h
+uqadd z0.h, z0.h, #0
+uqadd z0.h, z0.h, #0, lsl #8
+uqadd z0.h, z0.h, z0.h
+uqadd z0.s, z0.s, #0
+uqadd z0.s, z0.s, #0, lsl #8
+uqadd z0.s, z0.s, z0.s
+uqadd z29.s, p7/m, z29.s, z30.s
+uqadd z31.b, z31.b, #255
+uqadd z31.d, p7/m, z31.d, z30.d
+uqadd z31.d, z31.d, #65280
+uqadd z31.h, z31.h, #65280
+uqadd z31.s, z31.s, #65280
+uqdecb w0
+uqdecb w0, all, mul #16
+uqdecb w0, pow2
+uqdecb w0, pow2, mul #16
+uqdecb x0
+uqdecb x0, #14
+uqdecb x0, all, mul #16
+uqdecb x0, pow2
+uqdecb x0, vl1
+uqdecd w0
+uqdecd w0, all, mul #16
+uqdecd w0, pow2
+uqdecd w0, pow2, mul #16
+uqdecd x0
+uqdecd x0, #14
+uqdecd x0, all, mul #16
+uqdecd x0, pow2
+uqdecd x0, vl1
+uqdecd z0.d
+uqdecd z0.d, all, mul #16
+uqdecd z0.d, pow2
+uqdecd z0.d, pow2, mul #16
+uqdech w0
+uqdech w0, all, mul #16
+uqdech w0, pow2
+uqdech w0, pow2, mul #16
+uqdech x0
+uqdech x0, #14
+uqdech x0, all, mul #16
+uqdech x0, pow2
+uqdech x0, vl1
+uqdech z0.h
+uqdech z0.h, all, mul #16
+uqdech z0.h, pow2
+uqdech z0.h, pow2, mul #16
+uqdecp wzr, p15.b
+uqdecp wzr, p15.d
+uqdecp wzr, p15.h
+uqdecp wzr, p15.s
+uqdecp x0, p0.b
+uqdecp x0, p0.d
+uqdecp x0, p0.h
+uqdecp x0, p0.s
+uqdecp z0.d, p0.d
+uqdecp z0.h, p0.h
+uqdecp z0.s, p0.s
+uqdecw w0
+uqdecw w0, all, mul #16
+uqdecw w0, pow2
+uqdecw w0, pow2, mul #16
+uqdecw x0
+uqdecw x0, #14
+uqdecw x0, all, mul #16
+uqdecw x0, pow2
+uqdecw x0, vl1
+uqdecw z0.s
+uqdecw z0.s, all, mul #16
+uqdecw z0.s, pow2
+uqdecw z0.s, pow2, mul #16
+uqincb w0
+uqincb w0, all, mul #16
+uqincb w0, pow2
+uqincb w0, pow2, mul #16
+uqincb x0
+uqincb x0, #14
+uqincb x0, all, mul #16
+uqincb x0, pow2
+uqincb x0, vl1
+uqincd w0
+uqincd w0, all, mul #16
+uqincd w0, pow2
+uqincd w0, pow2, mul #16
+uqincd x0
+uqincd x0, #14
+uqincd x0, all, mul #16
+uqincd x0, pow2
+uqincd x0, vl1
+uqincd z0.d
+uqincd z0.d, all, mul #16
+uqincd z0.d, pow2
+uqincd z0.d, pow2, mul #16
+uqinch w0
+uqinch w0, all, mul #16
+uqinch w0, pow2
+uqinch w0, pow2, mul #16
+uqinch x0
+uqinch x0, #14
+uqinch x0, all, mul #16
+uqinch x0, pow2
+uqinch x0, vl1
+uqinch z0.h
+uqinch z0.h, all, mul #16
+uqinch z0.h, pow2
+uqinch z0.h, pow2, mul #16
+uqincp wzr, p15.b
+uqincp wzr, p15.d
+uqincp wzr, p15.h
+uqincp wzr, p15.s
+uqincp x0, p0.b
+uqincp x0, p0.d
+uqincp x0, p0.h
+uqincp x0, p0.s
+uqincp z0.d, p0.d
+uqincp z0.h, p0.h
+uqincp z0.s, p0.s
+uqincw w0
+uqincw w0, all, mul #16
+uqincw w0, pow2
+uqincw w0, pow2, mul #16
+uqincw x0
+uqincw x0, #14
+uqincw x0, all, mul #16
+uqincw x0, pow2
+uqincw x0, vl1
+uqincw z0.s
+uqincw z0.s, all, mul #16
+uqincw z0.s, pow2
+uqincw z0.s, pow2, mul #16
+uqrshl z0.b, p0/m, z0.b, z1.b
+uqrshl z0.h, p0/m, z0.h, z1.h
+uqrshl z29.s, p7/m, z29.s, z30.s
+uqrshl z31.d, p7/m, z31.d, z30.d
+uqrshlr z0.b, p0/m, z0.b, z1.b
+uqrshlr z0.h, p0/m, z0.h, z1.h
+uqrshlr z29.s, p7/m, z29.s, z30.s
+uqrshlr z31.d, p7/m, z31.d, z30.d
+uqrshrnb z0.b, z0.h, #1
+uqrshrnb z0.h, z0.s, #1
+uqrshrnb z0.s, z0.d, #1
+uqrshrnb z31.b, z31.h, #8
+uqrshrnb z31.h, z31.s, #16
+uqrshrnb z31.s, z31.d, #32
+uqrshrnt z0.b, z0.h, #1
+uqrshrnt z0.h, z0.s, #1
+uqrshrnt z0.s, z0.d, #1
+uqrshrnt z31.b, z31.h, #8
+uqrshrnt z31.h, z31.s, #16
+uqrshrnt z31.s, z31.d, #32
+uqshl z0.b, p0/m, z0.b, #0
+uqshl z0.b, p0/m, z0.b, z1.b
+uqshl z0.d, p0/m, z0.d, #0
+uqshl z0.h, p0/m, z0.h, #0
+uqshl z0.h, p0/m, z0.h, z1.h
+uqshl z0.s, p0/m, z0.s, #0
+uqshl z29.s, p7/m, z29.s, z30.s
+uqshl z31.b, p0/m, z31.b, #7
+uqshl z31.d, p0/m, z31.d, #63
+uqshl z31.d, p7/m, z31.d, z30.d
+uqshl z31.h, p0/m, z31.h, #15
+uqshl z31.s, p0/m, z31.s, #31
+uqshlr z0.b, p0/m, z0.b, z1.b
+uqshlr z0.h, p0/m, z0.h, z1.h
+uqshlr z29.s, p7/m, z29.s, z30.s
+uqshlr z31.d, p7/m, z31.d, z30.d
+uqshrnb z0.b, z0.h, #1
+uqshrnb z0.h, z0.s, #1
+uqshrnb z0.s, z0.d, #1
+uqshrnb z31.b, z31.h, #8
+uqshrnb z31.h, z31.s, #16
+uqshrnb z31.s, z31.d, #32
+uqshrnt z0.b, z0.h, #1
+uqshrnt z0.h, z0.s, #1
+uqshrnt z0.s, z0.d, #1
+uqshrnt z31.b, z31.h, #8
+uqshrnt z31.h, z31.s, #16
+uqshrnt z31.s, z31.d, #32
+uqsub z0.b, p0/m, z0.b, z1.b
+uqsub z0.b, z0.b, #0
+uqsub z0.b, z0.b, z0.b
+uqsub z0.d, z0.d, #0
+uqsub z0.d, z0.d, #0, lsl #8
+uqsub z0.d, z0.d, z0.d
+uqsub z0.h, p0/m, z0.h, z1.h
+uqsub z0.h, z0.h, #0
+uqsub z0.h, z0.h, #0, lsl #8
+uqsub z0.h, z0.h, z0.h
+uqsub z0.s, z0.s, #0
+uqsub z0.s, z0.s, #0, lsl #8
+uqsub z0.s, z0.s, z0.s
+uqsub z29.s, p7/m, z29.s, z30.s
+uqsub z31.b, z31.b, #255
+uqsub z31.d, p7/m, z31.d, z30.d
+uqsub z31.d, z31.d, #65280
+uqsub z31.h, z31.h, #65280
+uqsub z31.s, z31.s, #65280
+uqsubr z0.b, p0/m, z0.b, z1.b
+uqsubr z0.h, p0/m, z0.h, z1.h
+uqsubr z29.s, p7/m, z29.s, z30.s
+uqsubr z31.d, p7/m, z31.d, z30.d
+uqxtnb z0.b, z31.h
+uqxtnb z0.h, z31.s
+uqxtnb z0.s, z31.d
+uqxtnt z0.b, z31.h
+uqxtnt z0.h, z31.s
+uqxtnt z0.s, z31.d
+urecpe z31.s, p7/m, z31.s
+urhadd z0.b, p0/m, z0.b, z1.b
+urhadd z0.h, p0/m, z0.h, z1.h
+urhadd z29.s, p7/m, z29.s, z30.s
+urhadd z31.d, p7/m, z31.d, z30.d
+urshl z0.b, p0/m, z0.b, z1.b
+urshl z0.h, p0/m, z0.h, z1.h
+urshl z29.s, p7/m, z29.s, z30.s
+urshl z31.d, p7/m, z31.d, z30.d
+urshlr z0.b, p0/m, z0.b, z1.b
+urshlr z0.h, p0/m, z0.h, z1.h
+urshlr z29.s, p7/m, z29.s, z30.s
+urshlr z31.d, p7/m, z31.d, z30.d
+urshr z0.b, p0/m, z0.b, #1
+urshr z0.d, p0/m, z0.d, #1
+urshr z0.h, p0/m, z0.h, #1
+urshr z0.s, p0/m, z0.s, #1
+urshr z31.b, p0/m, z31.b, #8
+urshr z31.d, p0/m, z31.d, #64
+urshr z31.h, p0/m, z31.h, #16
+urshr z31.s, p0/m, z31.s, #32
+ursqrte z31.s, p7/m, z31.s
+ursra z0.b, z0.b, #1
+ursra z0.d, z0.d, #1
+ursra z0.h, z0.h, #1
+ursra z0.s, z0.s, #1
+ursra z31.b, z31.b, #8
+ursra z31.d, z31.d, #64
+ursra z31.h, z31.h, #16
+ursra z31.s, z31.s, #32
+ushllb z0.d, z0.s, #0
+ushllb z0.h, z0.b, #0
+ushllb z0.s, z0.h, #0
+ushllb z31.d, z31.s, #31
+ushllb z31.h, z31.b, #7
+ushllb z31.s, z31.h, #15
+ushllt z0.d, z0.s, #0
+ushllt z0.h, z0.b, #0
+ushllt z0.s, z0.h, #0
+ushllt z31.d, z31.s, #31
+ushllt z31.h, z31.b, #7
+ushllt z31.s, z31.h, #15
+usmmla z0.s, z1.b, z2.b
+usqadd z0.b, p0/m, z0.b, z1.b
+usqadd z0.h, p0/m, z0.h, z1.h
+usqadd z29.s, p7/m, z29.s, z30.s
+usqadd z31.d, p7/m, z31.d, z30.d
+usra z0.b, z0.b, #1
+usra z0.d, z0.d, #1
+usra z0.h, z0.h, #1
+usra z0.s, z0.s, #1
+usra z31.b, z31.b, #8
+usra z31.d, z31.d, #64
+usra z31.h, z31.h, #16
+usra z31.s, z31.s, #32
+usublb z0.h, z1.b, z2.b
+usublb z29.s, z30.h, z31.h
+usublb z31.d, z31.s, z31.s
+usublt z0.h, z1.b, z2.b
+usublt z29.s, z30.h, z31.h
+usublt z31.d, z31.s, z31.s
+usubwb z0.h, z1.h, z2.b
+usubwb z29.s, z30.s, z31.h
+usubwb z31.d, z31.d, z31.s
+usubwt z0.h, z1.h, z2.b
+usubwt z29.s, z30.s, z31.h
+usubwt z31.d, z31.d, z31.s
+uunpkhi z31.d, z31.s
+uunpkhi z31.h, z31.b
+uunpkhi z31.s, z31.h
+uunpklo z31.d, z31.s
+uunpklo z31.h, z31.b
+uunpklo z31.s, z31.h
+uxtb z0.d, p0/m, z0.d
+uxtb z0.h, p0/m, z0.h
+uxtb z0.s, p0/m, z0.s
+uxtb z31.d, p7/m, z31.d
+uxtb z31.h, p7/m, z31.h
+uxtb z31.s, p7/m, z31.s
+uxth z0.d, p0/m, z0.d
+uxth z0.s, p0/m, z0.s
+uxth z31.d, p7/m, z31.d
+uxth z31.s, p7/m, z31.s
+uxtw z0.d, p0/m, z0.d
+uxtw z31.d, p7/m, z31.d
+uzp1 p15.b, p15.b, p15.b
+uzp1 p15.d, p15.d, p15.d
+uzp1 p15.h, p15.h, p15.h
+uzp1 p15.s, p15.s, p15.s
+uzp1 z31.b, z31.b, z31.b
+uzp1 z31.d, z31.d, z31.d
+uzp1 z31.h, z31.h, z31.h
+uzp1 z31.s, z31.s, z31.s
+uzp2 p15.b, p15.b, p15.b
+uzp2 p15.d, p15.d, p15.d
+uzp2 p15.h, p15.h, p15.h
+uzp2 p15.s, p15.s, p15.s
+uzp2 z31.b, z31.b, z31.b
+uzp2 z31.d, z31.d, z31.d
+uzp2 z31.h, z31.h, z31.h
+uzp2 z31.s, z31.s, z31.s
+whilege p15.b, w0, wzr
+whilege p15.b, wzr, w0
+whilege p15.b, x0, xzr
+whilege p15.b, xzr, x0
+whilege p15.d, w0, wzr
+whilege p15.d, x0, xzr
+whilege p15.h, w0, wzr
+whilege p15.h, x0, xzr
+whilege p15.s, w0, wzr
+whilege p15.s, x0, xzr
+whilerw p15.b, x30, x30
+whilerw p15.d, x30, x30
+whilerw p15.h, x30, x30
+whilerw p15.s, x30, x30
+whilewr p15.b, x30, x30
+whilewr p15.d, x30, x30
+whilewr p15.h, x30, x30
+whilewr p15.s, x30, x30
+wrffr p0.b
+wrffr p15.b
+xar z0.b, z0.b, z1.b, #1
+xar z0.d, z0.d, z1.d, #1
+xar z0.h, z0.h, z1.h, #1
+xar z0.s, z0.s, z1.s, #1
+xar z31.b, z31.b, z30.b, #8
+xar z31.d, z31.d, z30.d, #64
+xar z31.h, z31.h, z30.h, #16
+xar z31.s, z31.s, z30.s, #32
+zip1 p0.b, p0.b, p0.b
+zip1 p0.d, p0.d, p0.d
+zip1 p0.h, p0.h, p0.h
+zip1 p0.s, p0.s, p0.s
+zip1 p15.b, p15.b, p15.b
+zip1 p15.d, p15.d, p15.d
+zip1 p15.h, p15.h, p15.h
+zip1 p15.s, p15.s, p15.s
+zip1 z0.b, z0.b, z0.b
+zip1 z0.d, z0.d, z0.d
+zip1 z0.h, z0.h, z0.h
+zip1 z0.s, z0.s, z0.s
+zip1 z31.b, z31.b, z31.b
+zip1 z31.d, z31.d, z31.d
+zip1 z31.h, z31.h, z31.h
+zip1 z31.s, z31.s, z31.s
+zip2 p0.b, p0.b, p0.b
+zip2 p0.d, p0.d, p0.d
+zip2 p0.h, p0.h, p0.h
+zip2 p0.s, p0.s, p0.s
+zip2 p15.b, p15.b, p15.b
+zip2 p15.d, p15.d, p15.d
+zip2 p15.h, p15.h, p15.h
+zip2 p15.s, p15.s, p15.s
+zip2 z0.b, z0.b, z0.b
+zip2 z0.d, z0.d, z0.d
+zip2 z0.h, z0.h, z0.h
+zip2 z0.s, z0.s, z0.s
+zip2 z31.b, z31.b, z31.b
+zip2 z31.d, z31.d, z31.d
+zip2 z31.h, z31.h, z31.h
+zip2 z31.s, z31.s, z31.s
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 2 0.25 abs z0.b, p0/m, z0.b
+# CHECK-NEXT: 1 2 0.25 abs z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.25 abs z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 2 0.25 abs z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 2 0.25 abs z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.25 abs z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 abs z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 abs z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 adclb z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 adclb z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 adclt z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 adclt z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 add z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 add z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 add z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 add z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 add z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.25 add z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 add z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.25 add z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.25 add z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 add z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.25 add z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 add z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.25 add z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 add z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 add z0.s, z1.s, z2.s
+# CHECK-NEXT: 1 2 0.25 add z21.b, p5/m, z21.b, z10.b
+# CHECK-NEXT: 1 2 0.25 add z21.b, z10.b, z21.b
+# CHECK-NEXT: 1 2 0.25 add z21.d, p5/m, z21.d, z10.d
+# CHECK-NEXT: 1 2 0.25 add z21.d, z10.d, z21.d
+# CHECK-NEXT: 1 2 0.25 add z21.h, p5/m, z21.h, z10.h
+# CHECK-NEXT: 1 2 0.25 add z21.h, z10.h, z21.h
+# CHECK-NEXT: 1 2 0.25 add z21.s, p5/m, z21.s, z10.s
+# CHECK-NEXT: 1 2 0.25 add z21.s, z10.s, z21.s
+# CHECK-NEXT: 1 2 0.25 add z23.b, p3/m, z23.b, z13.b
+# CHECK-NEXT: 1 2 0.25 add z23.b, z13.b, z8.b
+# CHECK-NEXT: 1 2 0.25 add z23.d, p3/m, z23.d, z13.d
+# CHECK-NEXT: 1 2 0.25 add z23.d, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.25 add z23.h, p3/m, z23.h, z13.h
+# CHECK-NEXT: 1 2 0.25 add z23.h, z13.h, z8.h
+# CHECK-NEXT: 1 2 0.25 add z23.s, p3/m, z23.s, z13.s
+# CHECK-NEXT: 1 2 0.25 add z23.s, z13.s, z8.s
+# CHECK-NEXT: 1 2 0.25 add z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 add z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.25 add z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 add z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 add z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.25 add z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 add z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 add z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.25 add z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 add z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 add z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.25 add z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 addhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 addhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 addhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 addhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 addhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 addhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 addp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 addp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 addp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 addp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 addpl sp, sp, #31
+# CHECK-NEXT: 1 2 0.50 addpl x0, x0, #-32
+# CHECK-NEXT: 1 2 0.50 addpl x21, x21, #0
+# CHECK-NEXT: 1 2 0.50 addpl x23, x8, #-1
+# CHECK-NEXT: 1 2 0.50 addvl sp, sp, #31
+# CHECK-NEXT: 1 2 0.50 addvl x0, x0, #-32
+# CHECK-NEXT: 1 2 0.50 addvl x21, x21, #0
+# CHECK-NEXT: 1 2 0.50 addvl x23, x8, #-1
+# CHECK-NEXT: 1 2 0.25 adr z0.d, [z0.d, z0.d, lsl #1]
+# CHECK-NEXT: 1 2 0.25 adr z0.d, [z0.d, z0.d, lsl #2]
+# CHECK-NEXT: 1 2 0.25 adr z0.d, [z0.d, z0.d, lsl #3]
+# CHECK-NEXT: 1 2 0.25 adr z0.d, [z0.d, z0.d, sxtw #1]
+# CHECK-NEXT: 1 2 0.25 adr z0.d, [z0.d, z0.d, sxtw #2]
+# CHECK-NEXT: 1 2 0.25 adr z0.d, [z0.d, z0.d, sxtw #3]
+# CHECK-NEXT: 1 2 0.25 adr z0.d, [z0.d, z0.d, sxtw]
+# CHECK-NEXT: 1 2 0.25 adr z0.d, [z0.d, z0.d, uxtw #1]
+# CHECK-NEXT: 1 2 0.25 adr z0.d, [z0.d, z0.d, uxtw #2]
+# CHECK-NEXT: 1 2 0.25 adr z0.d, [z0.d, z0.d, uxtw #3]
+# CHECK-NEXT: 1 2 0.25 adr z0.d, [z0.d, z0.d, uxtw]
+# CHECK-NEXT: 1 2 0.25 adr z0.d, [z0.d, z0.d]
+# CHECK-NEXT: 1 2 0.25 adr z0.s, [z0.s, z0.s, lsl #1]
+# CHECK-NEXT: 1 2 0.25 adr z0.s, [z0.s, z0.s, lsl #2]
+# CHECK-NEXT: 1 2 0.25 adr z0.s, [z0.s, z0.s, lsl #3]
+# CHECK-NEXT: 1 2 0.25 adr z0.s, [z0.s, z0.s]
+# CHECK-NEXT: 1 2 0.25 aesd z0.b, z0.b, z31.b
+# CHECK-NEXT: 1 2 0.25 aese z0.b, z0.b, z31.b
+# CHECK-NEXT: 1 2 0.25 aesimc z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 aesimc z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 aesmc z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 aesmc z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 and p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 2 0.25 and z0.d, z0.d, #0x6
+# CHECK-NEXT: 1 2 0.25 and z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: 1 2 0.25 and z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 and z0.s, z0.s, #0x6
+# CHECK-NEXT: 1 2 0.25 and z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: 1 2 0.25 and z23.d, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.25 and z23.h, z23.h, #0x6
+# CHECK-NEXT: 1 2 0.25 and z23.h, z23.h, #0xfff9
+# CHECK-NEXT: 1 2 0.25 and z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 and z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 and z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 and z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 and z5.b, z5.b, #0x6
+# CHECK-NEXT: 1 2 0.25 and z5.b, z5.b, #0xf9
+# CHECK-NEXT: 1 2 0.50 ands p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 2 6 0.50 andv b0, p7, z31.b
+# CHECK-NEXT: 2 6 0.50 andv d0, p7, z31.d
+# CHECK-NEXT: 2 6 0.50 andv h0, p7, z31.h
+# CHECK-NEXT: 2 6 0.50 andv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.50 asr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 1 2 0.50 asr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 asr z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: 1 2 0.50 asr z0.b, z0.b, #1
+# CHECK-NEXT: 1 2 0.50 asr z0.b, z1.b, z2.d
+# CHECK-NEXT: 1 2 0.50 asr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 1 2 0.50 asr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 asr z0.d, z0.d, #1
+# CHECK-NEXT: 1 2 0.50 asr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 1 2 0.50 asr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 asr z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: 1 2 0.50 asr z0.h, z0.h, #1
+# CHECK-NEXT: 1 2 0.50 asr z0.h, z1.h, z2.d
+# CHECK-NEXT: 1 2 0.50 asr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 1 2 0.50 asr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 asr z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: 1 2 0.50 asr z0.s, z0.s, #1
+# CHECK-NEXT: 1 2 0.50 asr z0.s, z1.s, z2.d
+# CHECK-NEXT: 1 2 0.50 asr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 1 2 0.50 asr z31.b, z31.b, #8
+# CHECK-NEXT: 1 2 0.50 asr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 1 2 0.50 asr z31.d, z31.d, #64
+# CHECK-NEXT: 1 2 0.50 asr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 1 2 0.50 asr z31.h, z31.h, #16
+# CHECK-NEXT: 1 2 0.50 asr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 1 2 0.50 asr z31.s, z31.s, #32
+# CHECK-NEXT: 1 4 0.25 asrd z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 1 4 0.25 asrd z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 asrd z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 asrd z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 asrd z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 1 4 0.25 asrd z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 1 4 0.25 asrd z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 1 4 0.25 asrd z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 1 2 0.50 asrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 asrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 asrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 asrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 bcax z29.d, z29.d, z30.d, z31.d
+# CHECK-NEXT: 2 6 2.00 bdep z0.b, z1.b, z31.b
+# CHECK-NEXT: 2 6 2.00 bdep z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 6 2.00 bdep z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 6 2.00 bdep z0.s, z1.s, z31.s
+# CHECK-NEXT: 2 6 2.00 bext z0.b, z1.b, z31.b
+# CHECK-NEXT: 2 6 2.00 bext z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 6 2.00 bext z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 6 2.00 bext z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 bfcvt z0.h, p0/m, z1.s
+# CHECK-NEXT: 1 4 0.50 bfcvtnt z0.h, p0/m, z1.s
+# CHECK-NEXT: 1 5 0.25 bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: 1 5 0.25 bfdot z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: 1 5 0.25 bfdot z0.s, z1.h, z2.h[3]
+# CHECK-NEXT: 1 5 0.25 bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: 1 5 0.25 bfmlalb z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: 1 5 0.25 bfmlalb z0.s, z1.h, z2.h[7]
+# CHECK-NEXT: 1 5 0.25 bfmlalb z10.s, z21.h, z14.h
+# CHECK-NEXT: 1 5 0.25 bfmlalb z21.s, z14.h, z3.h[2]
+# CHECK-NEXT: 1 5 0.25 bfmlalt z0.s, z1.h, z2.h
+# CHECK-NEXT: 1 5 0.25 bfmlalt z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: 1 5 0.25 bfmlalt z0.s, z1.h, z2.h[7]
+# CHECK-NEXT: 1 5 0.25 bfmlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 5 0.25 bfmlalt z14.s, z10.h, z21.h
+# CHECK-NEXT: 1 6 0.25 bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: 2 6 2.00 bgrp z0.b, z1.b, z31.b
+# CHECK-NEXT: 2 6 2.00 bgrp z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 6 2.00 bgrp z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 6 2.00 bgrp z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 bic p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 bic p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.25 bic z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 bic z23.d, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.25 bic z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 bic z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 bic z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 bic z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 bics p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 bics p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 brka p0.b, p15/m, p15.b
+# CHECK-NEXT: 1 2 0.50 brka p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.50 brkas p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.50 brkb p0.b, p15/m, p15.b
+# CHECK-NEXT: 1 2 0.50 brkb p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.50 brkbs p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.50 brkn p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT: 1 3 0.50 brkn p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 brkns p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT: 1 3 0.50 brkns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 brkpa p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 1 3 0.50 brkpa p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 brkpas p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 1 3 0.50 brkpas p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 brkpb p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 1 3 0.50 brkpb p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 brkpbs p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 1 3 0.50 brkpbs p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.25 bsl z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: 1 2 0.25 bsl1n z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: 1 2 0.25 bsl2n z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: 1 2 0.25 cadd z0.b, z0.b, z0.b, #90
+# CHECK-NEXT: 1 2 0.25 cadd z0.d, z0.d, z0.d, #90
+# CHECK-NEXT: 1 2 0.25 cadd z0.h, z0.h, z0.h, #90
+# CHECK-NEXT: 1 2 0.25 cadd z0.s, z0.s, z0.s, #90
+# CHECK-NEXT: 1 2 0.25 cadd z31.b, z31.b, z31.b, #270
+# CHECK-NEXT: 1 2 0.25 cadd z31.d, z31.d, z31.d, #270
+# CHECK-NEXT: 1 2 0.25 cadd z31.h, z31.h, z31.h, #270
+# CHECK-NEXT: 1 2 0.25 cadd z31.s, z31.s, z31.s, #270
+# CHECK-NEXT: 1 3 0.50 cdot z0.d, z1.h, z15.h[1], #0
+# CHECK-NEXT: 1 3 0.50 cdot z0.d, z1.h, z31.h, #0
+# CHECK-NEXT: 1 3 0.50 cdot z0.d, z1.h, z31.h, #180
+# CHECK-NEXT: 1 3 0.50 cdot z0.d, z1.h, z31.h, #270
+# CHECK-NEXT: 1 3 0.50 cdot z0.d, z1.h, z31.h, #90
+# CHECK-NEXT: 1 3 0.25 cdot z0.s, z1.b, z31.b, #0
+# CHECK-NEXT: 1 3 0.25 cdot z0.s, z1.b, z7.b[3], #0
+# CHECK-NEXT: 1 3 0.50 cdot z29.d, z30.h, z0.h[0], #180
+# CHECK-NEXT: 1 3 0.50 cdot z31.d, z30.h, z7.h[1], #270
+# CHECK-NEXT: 1 3 0.50 cdot z5.d, z6.h, z3.h[0], #90
+# CHECK-NEXT: 1 3 1.00 clasta b0, p7, b0, z31.b
+# CHECK-NEXT: 1 3 1.00 clasta d0, p7, d0, z31.d
+# CHECK-NEXT: 1 3 1.00 clasta h0, p7, h0, z31.h
+# CHECK-NEXT: 1 3 1.00 clasta s0, p7, s0, z31.s
+# CHECK-NEXT: 2 8 1.00 clasta w0, p7, w0, z31.b
+# CHECK-NEXT: 2 8 1.00 clasta w0, p7, w0, z31.h
+# CHECK-NEXT: 2 8 1.00 clasta w0, p7, w0, z31.s
+# CHECK-NEXT: 2 8 1.00 clasta x0, p7, x0, z31.d
+# CHECK-NEXT: 1 3 1.00 clasta z0.b, p7, z0.b, z31.b
+# CHECK-NEXT: 1 3 1.00 clasta z0.d, p7, z0.d, z31.d
+# CHECK-NEXT: 1 3 1.00 clasta z0.h, p7, z0.h, z31.h
+# CHECK-NEXT: 1 3 1.00 clasta z0.s, p7, z0.s, z31.s
+# CHECK-NEXT: 1 3 1.00 clastb b0, p7, b0, z31.b
+# CHECK-NEXT: 1 3 1.00 clastb d0, p7, d0, z31.d
+# CHECK-NEXT: 1 3 1.00 clastb h0, p7, h0, z31.h
+# CHECK-NEXT: 1 3 1.00 clastb s0, p7, s0, z31.s
+# CHECK-NEXT: 2 8 1.00 clastb w0, p7, w0, z31.b
+# CHECK-NEXT: 2 8 1.00 clastb w0, p7, w0, z31.h
+# CHECK-NEXT: 2 8 1.00 clastb w0, p7, w0, z31.s
+# CHECK-NEXT: 2 8 1.00 clastb x0, p7, x0, z31.d
+# CHECK-NEXT: 1 3 1.00 clastb z0.b, p7, z0.b, z31.b
+# CHECK-NEXT: 1 3 1.00 clastb z0.d, p7, z0.d, z31.d
+# CHECK-NEXT: 1 3 1.00 clastb z0.h, p7, z0.h, z31.h
+# CHECK-NEXT: 1 3 1.00 clastb z0.s, p7, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.25 cls z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.25 cls z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 cls z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 cls z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 clz z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.25 clz z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 clz z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 clz z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 4 0.50 cmla z0.b, z1.b, z2.b, #0
+# CHECK-NEXT: 1 5 1.00 cmla z0.d, z1.d, z2.d, #0
+# CHECK-NEXT: 1 4 0.50 cmla z0.h, z1.h, z2.h, #0
+# CHECK-NEXT: 1 4 0.50 cmla z0.h, z1.h, z2.h[0], #0
+# CHECK-NEXT: 1 4 0.50 cmla z0.s, z1.s, z2.s, #0
+# CHECK-NEXT: 1 4 0.50 cmla z0.s, z1.s, z2.s[0], #0
+# CHECK-NEXT: 1 4 0.50 cmla z15.b, z16.b, z17.b, #270
+# CHECK-NEXT: 1 5 1.00 cmla z15.d, z16.d, z17.d, #270
+# CHECK-NEXT: 1 4 0.50 cmla z15.h, z16.h, z17.h, #270
+# CHECK-NEXT: 1 4 0.50 cmla z15.s, z16.s, z17.s, #270
+# CHECK-NEXT: 1 4 0.50 cmla z29.b, z30.b, z31.b, #90
+# CHECK-NEXT: 1 5 1.00 cmla z29.d, z30.d, z31.d, #90
+# CHECK-NEXT: 1 4 0.50 cmla z29.h, z30.h, z31.h, #90
+# CHECK-NEXT: 1 4 0.50 cmla z29.s, z30.s, z31.s, #90
+# CHECK-NEXT: 1 4 0.50 cmla z31.b, z31.b, z31.b, #180
+# CHECK-NEXT: 1 5 1.00 cmla z31.d, z31.d, z31.d, #180
+# CHECK-NEXT: 1 4 0.50 cmla z31.h, z30.h, z7.h[0], #180
+# CHECK-NEXT: 1 4 0.50 cmla z31.h, z31.h, z31.h, #180
+# CHECK-NEXT: 1 4 0.50 cmla z31.s, z30.s, z7.s[0], #180
+# CHECK-NEXT: 1 4 0.50 cmla z31.s, z31.s, z31.s, #180
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmpge p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 1 3 1.00 cmpge p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 1 3 1.00 cmpge p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmpge p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpge p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmpge p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 1 3 1.00 cmpge p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 1 3 1.00 cmpge p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpge p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 1 3 1.00 cmpge p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 1 3 1.00 cmpge p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpge p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmpge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmpge p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 1 3 1.00 cmpge p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 1 3 1.00 cmpge p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpge p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmpge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmphi p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: 1 3 1.00 cmphi p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: 1 3 1.00 cmphi p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmphi p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphi p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmphi p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: 1 3 1.00 cmphi p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: 1 3 1.00 cmphi p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphi p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphi p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: 1 3 1.00 cmphi p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: 1 3 1.00 cmphi p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphi p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmphi p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmphi p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: 1 3 1.00 cmphi p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: 1 3 1.00 cmphi p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphi p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmphi p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmphs p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: 1 3 1.00 cmphs p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: 1 3 1.00 cmphs p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmphs p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphs p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmphs p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: 1 3 1.00 cmphs p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: 1 3 1.00 cmphs p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphs p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphs p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: 1 3 1.00 cmphs p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: 1 3 1.00 cmphs p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphs p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmphs p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmphs p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: 1 3 1.00 cmphs p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: 1 3 1.00 cmphs p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphs p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmphs p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmple p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 1 3 1.00 cmple p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 1 3 1.00 cmple p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmple p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 1 3 1.00 cmple p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 1 3 1.00 cmple p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 1 3 1.00 cmple p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 1 3 1.00 cmple p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmple p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 1 3 1.00 cmple p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 1 3 1.00 cmple p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmplo p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: 1 3 1.00 cmplo p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: 1 3 1.00 cmplo p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmplo p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: 1 3 1.00 cmplo p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: 1 3 1.00 cmplo p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: 1 3 1.00 cmplo p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: 1 3 1.00 cmplo p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmplo p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: 1 3 1.00 cmplo p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: 1 3 1.00 cmplo p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpls p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: 1 3 1.00 cmpls p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: 1 3 1.00 cmpls p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpls p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: 1 3 1.00 cmpls p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: 1 3 1.00 cmpls p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: 1 3 1.00 cmpls p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: 1 3 1.00 cmpls p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpls p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: 1 3 1.00 cmpls p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: 1 3 1.00 cmpls p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmplt p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 1 3 1.00 cmplt p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 1 3 1.00 cmplt p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmplt p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 1 3 1.00 cmplt p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 1 3 1.00 cmplt p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 1 3 1.00 cmplt p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 1 3 1.00 cmplt p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmplt p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 1 3 1.00 cmplt p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 1 3 1.00 cmplt p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpne p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 1 3 1.00 cmpne p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 1 3 1.00 cmpne p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmpne p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpne p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 1 3 1.00 cmpne p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 1 3 1.00 cmpne p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpne p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 1 3 1.00 cmpne p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 1 3 1.00 cmpne p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpne p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmpne p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 1 3 1.00 cmpne p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 1 3 1.00 cmpne p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpne p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 cnot z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.25 cnot z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 cnot z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 cnot z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 cnt z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.25 cnt z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 cnt z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 cnt z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 cntb x0
+# CHECK-NEXT: 1 2 0.50 cntb x0, #28
+# CHECK-NEXT: 1 2 0.50 cntb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 cntb x0, pow2
+# CHECK-NEXT: 1 2 0.50 cntd x0
+# CHECK-NEXT: 1 2 0.50 cntd x0, #28
+# CHECK-NEXT: 1 2 0.50 cntd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 cntd x0, pow2
+# CHECK-NEXT: 1 2 0.50 cnth x0
+# CHECK-NEXT: 1 2 0.50 cnth x0, #28
+# CHECK-NEXT: 1 2 0.50 cnth x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 cnth x0, pow2
+# CHECK-NEXT: 1 2 0.50 cntp x0, p15, p0.b
+# CHECK-NEXT: 1 2 0.50 cntp x0, p15, p0.d
+# CHECK-NEXT: 1 2 0.50 cntp x0, p15, p0.h
+# CHECK-NEXT: 1 2 0.50 cntp x0, p15, p0.s
+# CHECK-NEXT: 1 2 0.50 cntw x0
+# CHECK-NEXT: 1 2 0.50 cntw x0, #28
+# CHECK-NEXT: 1 2 0.50 cntw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 cntw x0, pow2
+# CHECK-NEXT: 1 3 1.00 compact z31.d, p7, z31.d
+# CHECK-NEXT: 1 3 1.00 compact z31.s, p7, z31.s
+# CHECK-NEXT: 2 1 1.00 ctermeq w30, wzr
+# CHECK-NEXT: 2 1 1.00 ctermeq wzr, w30
+# CHECK-NEXT: 2 1 1.00 ctermeq x30, xzr
+# CHECK-NEXT: 2 1 1.00 ctermeq xzr, x30
+# CHECK-NEXT: 2 1 1.00 ctermne w30, wzr
+# CHECK-NEXT: 2 1 1.00 ctermne wzr, w30
+# CHECK-NEXT: 2 1 1.00 ctermne x30, xzr
+# CHECK-NEXT: 2 1 1.00 ctermne xzr, x30
+# CHECK-NEXT: 1 1 0.13 decb x0
+# CHECK-NEXT: 1 2 0.50 decb x0, #14
+# CHECK-NEXT: 1 2 0.50 decb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 decb x0, pow2
+# CHECK-NEXT: 1 2 0.50 decb x0, vl1
+# CHECK-NEXT: 1 1 0.13 decd x0
+# CHECK-NEXT: 1 2 0.50 decd x0, #14
+# CHECK-NEXT: 1 2 0.50 decd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 decd x0, pow2
+# CHECK-NEXT: 1 2 0.50 decd x0, vl1
+# CHECK-NEXT: 1 1 0.13 dech x0
+# CHECK-NEXT: 1 2 0.50 dech x0, #14
+# CHECK-NEXT: 1 2 0.50 dech x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 dech x0, pow2
+# CHECK-NEXT: 1 2 0.50 dech x0, vl1
+# CHECK-NEXT: 1 2 0.50 decp x0, p0.b
+# CHECK-NEXT: 1 2 0.50 decp x0, p0.d
+# CHECK-NEXT: 1 2 0.50 decp x0, p0.h
+# CHECK-NEXT: 1 2 0.50 decp x0, p0.s
+# CHECK-NEXT: 1 2 0.50 decp xzr, p15.b
+# CHECK-NEXT: 1 2 0.50 decp xzr, p15.d
+# CHECK-NEXT: 1 2 0.50 decp xzr, p15.h
+# CHECK-NEXT: 1 2 0.50 decp xzr, p15.s
+# CHECK-NEXT: 3 7 1.00 decp z31.d, p15.d
+# CHECK-NEXT: 3 7 1.00 decp z31.h, p15.h
+# CHECK-NEXT: 3 7 1.00 decp z31.s, p15.s
+# CHECK-NEXT: 1 1 0.13 decw x0
+# CHECK-NEXT: 1 2 0.50 decw x0, #14
+# CHECK-NEXT: 1 2 0.50 decw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 decw x0, pow2
+# CHECK-NEXT: 1 2 0.50 decw x0, vl1
+# CHECK-NEXT: 1 2 0.25 dupm z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: 1 2 0.25 dupm z0.s, #0xfffffff9
+# CHECK-NEXT: 1 2 0.25 dupm z23.h, #0xfff9
+# CHECK-NEXT: 1 2 0.25 dupm z5.b, #0xf9
+# CHECK-NEXT: 1 2 0.50 eor p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 2 0.25 eor z0.d, z0.d, #0x6
+# CHECK-NEXT: 1 2 0.25 eor z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: 1 2 0.25 eor z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 eor z0.s, z0.s, #0x6
+# CHECK-NEXT: 1 2 0.25 eor z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: 1 2 0.25 eor z23.d, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.25 eor z23.h, z23.h, #0x6
+# CHECK-NEXT: 1 2 0.25 eor z23.h, z23.h, #0xfff9
+# CHECK-NEXT: 1 2 0.25 eor z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 eor z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 eor z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 eor z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 eor z5.b, z5.b, #0x6
+# CHECK-NEXT: 1 2 0.25 eor z5.b, z5.b, #0xf9
+# CHECK-NEXT: 1 2 0.25 eor3 z29.d, z29.d, z30.d, z31.d
+# CHECK-NEXT: 1 2 0.25 eorbt z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 2 0.25 eorbt z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 eorbt z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 eorbt z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 eors p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 2 0.25 eortb z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 2 0.25 eortb z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 eortb z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 eortb z0.s, z1.s, z31.s
+# CHECK-NEXT: 2 6 0.50 eorv b0, p7, z31.b
+# CHECK-NEXT: 2 6 0.50 eorv d0, p7, z31.d
+# CHECK-NEXT: 2 6 0.50 eorv h0, p7, z31.h
+# CHECK-NEXT: 2 6 0.50 eorv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.25 ext z0.b, { z1.b, z2.b }, #0
+# CHECK-NEXT: 1 2 0.25 ext z31.b, z31.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 ext z31.b, z31.b, z0.b, #255
+# CHECK-NEXT: 1 2 0.25 ext z31.b, { z30.b, z31.b }, #255
+# CHECK-NEXT: 1 2 0.25 fabd z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.25 fabd z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.25 fabd z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.25 fabs z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 fabs z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 fabs z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 1.00 facge p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 facge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 2 1.00 facge p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 facge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 2 1.00 facge p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 2 1.00 facge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 2 1.00 facgt p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 facgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 2 1.00 facgt p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 facgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 2 1.00 facgt p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 2 1.00 facgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 2 0.25 fadd z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: 1 2 0.25 fadd z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.25 fadd z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 fadd z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: 1 2 0.25 fadd z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.25 fadd z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 fadd z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: 1 2 0.25 fadd z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.25 fadd z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 fadd z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.25 fadd z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.25 fadd z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 4 0.25 fadda d0, p7, d0, z31.d
+# CHECK-NEXT: 1 10 9.00 fadda h0, p7, h0, z31.h
+# CHECK-NEXT: 1 6 5.00 fadda s0, p7, s0, z31.s
+# CHECK-NEXT: 1 2 0.25 faddp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 faddp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 faddp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 2 4 0.50 faddv d0, p7, z31.d
+# CHECK-NEXT: 4 8 1.00 faddv h0, p7, z31.h
+# CHECK-NEXT: 3 6 0.75 faddv s0, p7, z31.s
+# CHECK-NEXT: 1 3 0.25 fcadd z0.d, p0/m, z0.d, z0.d, #90
+# CHECK-NEXT: 1 3 0.25 fcadd z0.h, p0/m, z0.h, z0.h, #90
+# CHECK-NEXT: 1 3 0.25 fcadd z0.s, p0/m, z0.s, z0.s, #90
+# CHECK-NEXT: 1 3 0.25 fcadd z31.d, p7/m, z31.d, z31.d, #270
+# CHECK-NEXT: 1 3 0.25 fcadd z31.h, p7/m, z31.h, z31.h, #270
+# CHECK-NEXT: 1 3 0.25 fcadd z31.s, p7/m, z31.s, z31.s, #270
+# CHECK-NEXT: 1 2 1.00 fcmeq p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmeq p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 fcmeq p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmeq p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 fcmeq p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmeq p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 2 1.00 fcmge p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmge p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 fcmge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 2 1.00 fcmge p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmge p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 fcmge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 2 1.00 fcmge p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmge p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 2 1.00 fcmge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 5 0.25 fcmla z0.d, p0/m, z0.d, z0.d, #0
+# CHECK-NEXT: 1 5 0.25 fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: 1 5 0.25 fcmla z0.h, p0/m, z0.h, z0.h, #0
+# CHECK-NEXT: 1 5 0.25 fcmla z0.h, p0/m, z1.h, z2.h, #90
+# CHECK-NEXT: 1 5 0.25 fcmla z0.h, z0.h, z0.h[0], #0
+# CHECK-NEXT: 1 5 0.25 fcmla z0.s, p0/m, z0.s, z0.s, #0
+# CHECK-NEXT: 1 5 0.25 fcmla z0.s, p0/m, z1.s, z2.s, #90
+# CHECK-NEXT: 1 5 0.25 fcmla z21.s, z10.s, z5.s[1], #90
+# CHECK-NEXT: 1 5 0.25 fcmla z23.s, z13.s, z8.s[0], #270
+# CHECK-NEXT: 1 5 0.25 fcmla z29.d, p7/m, z30.d, z31.d, #180
+# CHECK-NEXT: 1 5 0.25 fcmla z29.h, p7/m, z30.h, z31.h, #180
+# CHECK-NEXT: 1 5 0.25 fcmla z29.s, p7/m, z30.s, z31.s, #180
+# CHECK-NEXT: 1 5 0.25 fcmla z31.d, p7/m, z31.d, z31.d, #270
+# CHECK-NEXT: 1 5 0.25 fcmla z31.h, p7/m, z31.h, z31.h, #270
+# CHECK-NEXT: 1 5 0.25 fcmla z31.h, z31.h, z7.h[3], #270
+# CHECK-NEXT: 1 5 0.25 fcmla z31.s, p7/m, z31.s, z31.s, #270
+# CHECK-NEXT: 1 2 1.00 fcmle p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmle p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmle p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmlt p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmlt p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmlt p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmne p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmne p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 fcmne p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmne p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 fcmne p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmne p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 2 1.00 fcmuo p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 fcmuo p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 fcmuo p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 3 0.50 fcvt z0.d, p0/m, z0.h
+# CHECK-NEXT: 1 3 0.50 fcvt z0.d, p0/m, z0.s
+# CHECK-NEXT: 1 3 0.50 fcvt z0.h, p0/m, z0.d
+# CHECK-NEXT: 2 4 1.00 fcvt z0.h, p0/m, z0.s
+# CHECK-NEXT: 1 3 0.50 fcvt z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 1.00 fcvt z0.s, p0/m, z0.h
+# CHECK-NEXT: 2 4 1.00 fcvtlt z0.s, p0/m, z1.h
+# CHECK-NEXT: 1 3 0.50 fcvtlt z30.d, p7/m, z31.s
+# CHECK-NEXT: 2 4 1.00 fcvtnt z0.h, p0/m, z1.s
+# CHECK-NEXT: 1 3 0.50 fcvtnt z30.s, p7/m, z31.d
+# CHECK-NEXT: 1 3 0.50 fcvtx z0.s, p0/m, z0.d
+# CHECK-NEXT: 1 3 0.50 fcvtx z30.s, p7/m, z31.d
+# CHECK-NEXT: 1 3 0.50 fcvtxnt z0.s, p0/m, z1.d
+# CHECK-NEXT: 1 3 0.50 fcvtxnt z30.s, p7/m, z31.d
+# CHECK-NEXT: 1 3 0.50 fcvtzs z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 3 0.50 fcvtzs z0.d, p0/m, z0.h
+# CHECK-NEXT: 1 3 0.50 fcvtzs z0.d, p0/m, z0.s
+# CHECK-NEXT: 4 6 2.00 fcvtzs z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 3 0.50 fcvtzs z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 1.00 fcvtzs z0.s, p0/m, z0.h
+# CHECK-NEXT: 2 4 1.00 fcvtzs z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 3 0.50 fcvtzu z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 3 0.50 fcvtzu z0.d, p0/m, z0.h
+# CHECK-NEXT: 1 3 0.50 fcvtzu z0.d, p0/m, z0.s
+# CHECK-NEXT: 4 6 2.00 fcvtzu z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 3 0.50 fcvtzu z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 1.00 fcvtzu z0.s, p0/m, z0.h
+# CHECK-NEXT: 2 4 1.00 fcvtzu z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 14 2.00 fdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 13 8.00 fdiv z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 11 4.00 fdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 14 2.00 fdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 13 8.00 fdivr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 11 4.00 fdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 3 1.00 fexpa z0.d, z31.d
+# CHECK-NEXT: 1 3 1.00 fexpa z0.h, z31.h
+# CHECK-NEXT: 1 3 1.00 fexpa z0.s, z31.s
+# CHECK-NEXT: 1 3 0.50 flogb z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 2.00 flogb z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 flogb z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 4 0.25 fmad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 fmad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 fmad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 fmax z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: 1 2 0.25 fmax z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.25 fmax z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: 1 2 0.25 fmax z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.25 fmax z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: 1 2 0.25 fmax z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.25 fmax z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.25 fmax z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.25 fmax z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 2 0.25 fmaxnm z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: 1 2 0.25 fmaxnm z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.25 fmaxnm z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: 1 2 0.25 fmaxnm z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.25 fmaxnm z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: 1 2 0.25 fmaxnm z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.25 fmaxnm z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.25 fmaxnm z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.25 fmaxnm z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 2 0.25 fmaxnmp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 fmaxnmp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 fmaxnmp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 2 4 0.50 fmaxnmv d0, p7, z31.d
+# CHECK-NEXT: 4 8 1.00 fmaxnmv h0, p7, z31.h
+# CHECK-NEXT: 3 6 0.75 fmaxnmv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.25 fmaxp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 fmaxp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 fmaxp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 2 4 0.50 fmaxv d0, p7, z31.d
+# CHECK-NEXT: 4 8 1.00 fmaxv h0, p7, z31.h
+# CHECK-NEXT: 3 6 0.75 fmaxv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.25 fmin z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: 1 2 0.25 fmin z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.25 fmin z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: 1 2 0.25 fmin z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.25 fmin z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: 1 2 0.25 fmin z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.25 fmin z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.25 fmin z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.25 fmin z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 2 0.25 fminnm z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: 1 2 0.25 fminnm z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.25 fminnm z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: 1 2 0.25 fminnm z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.25 fminnm z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: 1 2 0.25 fminnm z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.25 fminnm z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.25 fminnm z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.25 fminnm z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 2 0.25 fminnmp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 fminnmp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 fminnmp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 2 4 0.50 fminnmv d0, p7, z31.d
+# CHECK-NEXT: 4 8 1.00 fminnmv h0, p7, z31.h
+# CHECK-NEXT: 3 6 0.75 fminnmv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.25 fminp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 fminp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 fminp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 2 4 0.50 fminv d0, p7, z31.d
+# CHECK-NEXT: 4 8 1.00 fminv h0, p7, z31.h
+# CHECK-NEXT: 3 6 0.75 fminv s0, p7, z31.s
+# CHECK-NEXT: 1 4 0.25 fmla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 fmla z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: 1 4 0.25 fmla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 fmla z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.25 fmla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.25 fmla z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 0.25 fmlalb z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: 1 4 0.25 fmlalb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.25 fmlalb z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.25 fmlalt z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: 1 4 0.25 fmlalt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.25 fmlalt z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.25 fmls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 fmls z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: 1 4 0.25 fmls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 fmls z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.25 fmls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.25 fmls z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 0.25 fmlslb z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: 1 4 0.25 fmlslb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.25 fmlslb z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.25 fmlslt z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: 1 4 0.25 fmlslt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.25 fmlslt z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: 1 2 0.25 fmov z0.d, #-10.00000000
+# CHECK-NEXT: 1 2 0.25 fmov z0.d, #0.12500000
+# CHECK-NEXT: 1 2 0.25 fmov z0.d, p0/m, #-10.00000000
+# CHECK-NEXT: 1 2 0.25 fmov z0.d, p0/m, #0.12500000
+# CHECK-NEXT: 1 2 0.25 fmov z0.h, #-0.12500000
+# CHECK-NEXT: 1 2 0.25 fmov z0.h, p0/m, #-0.12500000
+# CHECK-NEXT: 1 2 0.25 fmov z0.s, #-0.12500000
+# CHECK-NEXT: 1 2 0.25 fmov z0.s, p0/m, #-0.12500000
+# CHECK-NEXT: 1 4 0.25 fmsb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 fmsb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 fmsb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 3 0.25 fmul z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: 1 3 0.25 fmul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 3 0.25 fmul z0.d, z0.d, z0.d[0]
+# CHECK-NEXT: 1 3 0.25 fmul z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 3 0.25 fmul z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: 1 3 0.25 fmul z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 3 0.25 fmul z0.h, z0.h, z0.h[0]
+# CHECK-NEXT: 1 3 0.25 fmul z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 3 0.25 fmul z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: 1 3 0.25 fmul z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 3 0.25 fmul z0.s, z0.s, z0.s[0]
+# CHECK-NEXT: 1 3 0.25 fmul z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 3 0.25 fmul z31.d, p7/m, z31.d, #2.0
+# CHECK-NEXT: 1 3 0.25 fmul z31.d, z31.d, z15.d[1]
+# CHECK-NEXT: 1 3 0.25 fmul z31.h, p7/m, z31.h, #2.0
+# CHECK-NEXT: 1 3 0.25 fmul z31.h, z31.h, z7.h[7]
+# CHECK-NEXT: 1 3 0.25 fmul z31.s, p7/m, z31.s, #2.0
+# CHECK-NEXT: 1 3 0.25 fmul z31.s, z31.s, z7.s[3]
+# CHECK-NEXT: 1 3 0.25 fmulx z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 3 0.25 fmulx z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 3 0.25 fmulx z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.25 fneg z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 fneg z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 fneg z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 4 0.25 fnmad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 fnmad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 fnmad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.25 fnmla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 fnmla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 fnmla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.25 fnmls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 fnmls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 fnmls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.25 fnmsb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 fnmsb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 fnmsb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 3 0.50 frecpe z0.d, z31.d
+# CHECK-NEXT: 4 6 2.00 frecpe z0.h, z31.h
+# CHECK-NEXT: 2 4 1.00 frecpe z0.s, z31.s
+# CHECK-NEXT: 1 4 0.25 frecps z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 frecps z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 frecps z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 3 0.50 frecpx z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 2.00 frecpx z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frecpx z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 0.50 frinta z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 2.00 frinta z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frinta z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 0.50 frinti z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 2.00 frinti z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frinti z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 0.50 frintm z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 2.00 frintm z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frintm z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 0.50 frintn z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 2.00 frintn z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frintn z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 0.50 frintp z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 2.00 frintp z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frintp z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 0.50 frintx z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 2.00 frintx z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frintx z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 0.50 frintz z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 2.00 frintz z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 1.00 frintz z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 0.50 frsqrte z0.d, z31.d
+# CHECK-NEXT: 4 6 2.00 frsqrte z0.h, z31.h
+# CHECK-NEXT: 2 4 1.00 frsqrte z0.s, z31.s
+# CHECK-NEXT: 1 4 0.25 frsqrts z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 frsqrts z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 frsqrts z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 3 0.25 fscale z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 3 0.25 fscale z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 3 0.25 fscale z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 14 2.00 fsqrt z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 13 8.00 fsqrt z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 11 4.00 fsqrt z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 fsub z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: 1 2 0.25 fsub z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.25 fsub z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 fsub z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: 1 2 0.25 fsub z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.25 fsub z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 fsub z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: 1 2 0.25 fsub z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.25 fsub z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 fsub z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.25 fsub z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.25 fsub z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 2 0.25 fsubr z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: 1 2 0.25 fsubr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.25 fsubr z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: 1 2 0.25 fsubr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.25 fsubr z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: 1 2 0.25 fsubr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.25 fsubr z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.25 fsubr z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.25 fsubr z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 4 0.25 ftmad z0.d, z0.d, z31.d, #7
+# CHECK-NEXT: 1 4 0.25 ftmad z0.h, z0.h, z31.h, #7
+# CHECK-NEXT: 1 4 0.25 ftmad z0.s, z0.s, z31.s, #7
+# CHECK-NEXT: 1 3 0.25 ftsmul z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 3 0.25 ftsmul z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 3 0.25 ftsmul z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 3 0.25 ftssel z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 3 0.25 ftssel z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 3 0.25 ftssel z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 histcnt z0.s, p0/z, z1.s, z2.s
+# CHECK-NEXT: 1 2 0.25 histcnt z29.d, p7/z, z30.d, z31.d
+# CHECK-NEXT: 1 2 0.25 histseg z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 1 0.13 incb x0
+# CHECK-NEXT: 1 2 0.50 incb x0, #14
+# CHECK-NEXT: 1 2 0.50 incb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 incb x0, pow2
+# CHECK-NEXT: 1 2 0.50 incb x0, vl1
+# CHECK-NEXT: 1 1 0.13 incd x0
+# CHECK-NEXT: 1 2 0.50 incd x0, #14
+# CHECK-NEXT: 1 2 0.50 incd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 incd x0, pow2
+# CHECK-NEXT: 1 2 0.50 incd x0, vl1
+# CHECK-NEXT: 1 2 0.25 incd z0.d
+# CHECK-NEXT: 1 2 0.25 incd z0.d, all, mul #16
+# CHECK-NEXT: 1 1 0.13 inch x0
+# CHECK-NEXT: 1 2 0.50 inch x0, #14
+# CHECK-NEXT: 1 2 0.50 inch x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 inch x0, pow2
+# CHECK-NEXT: 1 2 0.50 inch x0, vl1
+# CHECK-NEXT: 1 2 0.25 inch z0.h
+# CHECK-NEXT: 1 2 0.25 inch z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.50 incp x0, p0.b
+# CHECK-NEXT: 1 2 0.50 incp x0, p0.d
+# CHECK-NEXT: 1 2 0.50 incp x0, p0.h
+# CHECK-NEXT: 1 2 0.50 incp x0, p0.s
+# CHECK-NEXT: 1 2 0.50 incp xzr, p15.b
+# CHECK-NEXT: 1 2 0.50 incp xzr, p15.d
+# CHECK-NEXT: 1 2 0.50 incp xzr, p15.h
+# CHECK-NEXT: 1 2 0.50 incp xzr, p15.s
+# CHECK-NEXT: 3 7 1.00 incp z31.d, p15.d
+# CHECK-NEXT: 3 7 1.00 incp z31.h, p15.h
+# CHECK-NEXT: 3 7 1.00 incp z31.s, p15.s
+# CHECK-NEXT: 1 1 0.13 incw x0
+# CHECK-NEXT: 1 2 0.50 incw x0, #14
+# CHECK-NEXT: 1 2 0.50 incw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 incw x0, pow2
+# CHECK-NEXT: 1 2 0.50 incw x0, vl1
+# CHECK-NEXT: 1 2 0.25 incw z0.s
+# CHECK-NEXT: 1 2 0.25 incw z0.s, all, mul #16
+# CHECK-NEXT: 1 4 0.50 index z0.b, #0, #0
+# CHECK-NEXT: 2 5 1.00 index z0.d, #0, #0
+# CHECK-NEXT: 1 4 0.50 index z0.h, #0, #0
+# CHECK-NEXT: 2 7 1.00 index z0.h, w0, w0
+# CHECK-NEXT: 1 4 0.50 index z0.s, #0, #0
+# CHECK-NEXT: 2 7 1.00 index z21.b, w10, w21
+# CHECK-NEXT: 4 8 2.00 index z21.d, x10, x21
+# CHECK-NEXT: 2 7 1.00 index z21.s, w10, w21
+# CHECK-NEXT: 2 7 1.00 index z23.b, #13, w8
+# CHECK-NEXT: 2 7 1.00 index z23.b, w13, #8
+# CHECK-NEXT: 4 8 2.00 index z23.d, #13, x8
+# CHECK-NEXT: 4 8 2.00 index z23.d, x13, #8
+# CHECK-NEXT: 2 7 1.00 index z23.h, #13, w8
+# CHECK-NEXT: 2 7 1.00 index z23.h, w13, #8
+# CHECK-NEXT: 2 7 1.00 index z23.s, #13, w8
+# CHECK-NEXT: 2 7 1.00 index z23.s, w13, #8
+# CHECK-NEXT: 1 4 0.50 index z31.b, #-1, #-1
+# CHECK-NEXT: 2 7 1.00 index z31.b, #-1, wzr
+# CHECK-NEXT: 2 7 1.00 index z31.b, wzr, #-1
+# CHECK-NEXT: 2 7 1.00 index z31.b, wzr, wzr
+# CHECK-NEXT: 2 5 1.00 index z31.d, #-1, #-1
+# CHECK-NEXT: 4 8 2.00 index z31.d, #-1, xzr
+# CHECK-NEXT: 4 8 2.00 index z31.d, xzr, #-1
+# CHECK-NEXT: 4 8 2.00 index z31.d, xzr, xzr
+# CHECK-NEXT: 1 4 0.50 index z31.h, #-1, #-1
+# CHECK-NEXT: 2 7 1.00 index z31.h, #-1, wzr
+# CHECK-NEXT: 2 7 1.00 index z31.h, wzr, #-1
+# CHECK-NEXT: 2 7 1.00 index z31.h, wzr, wzr
+# CHECK-NEXT: 1 4 0.50 index z31.s, #-1, #-1
+# CHECK-NEXT: 2 7 1.00 index z31.s, #-1, wzr
+# CHECK-NEXT: 2 7 1.00 index z31.s, wzr, #-1
+# CHECK-NEXT: 2 7 1.00 index z31.s, wzr, wzr
+# CHECK-NEXT: 2 5 1.00 insr z0.b, w0
+# CHECK-NEXT: 2 5 1.00 insr z0.d, x0
+# CHECK-NEXT: 2 5 1.00 insr z0.h, w0
+# CHECK-NEXT: 2 5 1.00 insr z0.s, w0
+# CHECK-NEXT: 1 2 0.25 insr z31.b, b31
+# CHECK-NEXT: 2 5 1.00 insr z31.b, wzr
+# CHECK-NEXT: 1 2 0.25 insr z31.d, d31
+# CHECK-NEXT: 2 5 1.00 insr z31.d, xzr
+# CHECK-NEXT: 1 2 0.25 insr z31.h, h31
+# CHECK-NEXT: 2 5 1.00 insr z31.h, wzr
+# CHECK-NEXT: 1 2 0.25 insr z31.s, s31
+# CHECK-NEXT: 2 5 1.00 insr z31.s, wzr
+# CHECK-NEXT: 1 3 1.00 lasta b0, p7, z31.b
+# CHECK-NEXT: 1 3 1.00 lasta d0, p7, z31.d
+# CHECK-NEXT: 1 3 1.00 lasta h0, p7, z31.h
+# CHECK-NEXT: 1 3 1.00 lasta s0, p7, z31.s
+# CHECK-NEXT: 2 6 1.00 lasta w0, p7, z31.b
+# CHECK-NEXT: 2 6 1.00 lasta w0, p7, z31.h
+# CHECK-NEXT: 2 6 1.00 lasta w0, p7, z31.s
+# CHECK-NEXT: 2 6 1.00 lasta x0, p7, z31.d
+# CHECK-NEXT: 1 3 1.00 lastb b0, p7, z31.b
+# CHECK-NEXT: 1 3 1.00 lastb d0, p7, z31.d
+# CHECK-NEXT: 1 3 1.00 lastb h0, p7, z31.h
+# CHECK-NEXT: 1 3 1.00 lastb s0, p7, z31.s
+# CHECK-NEXT: 2 6 1.00 lastb w0, p7, z31.b
+# CHECK-NEXT: 2 6 1.00 lastb w0, p7, z31.h
+# CHECK-NEXT: 2 6 1.00 lastb w0, p7, z31.s
+# CHECK-NEXT: 2 6 1.00 lastb x0, p7, z31.d
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.b }, p0/z, [sp, x0]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 1.00 * ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.b }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.s }, p5/z, [x10, x21]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z23.d }, p3/z, [x13, x8]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z31.b }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 1.00 * ld1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 5 9 1.00 * ld1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z5.h }, p3/z, [x17, x16]
+# CHECK-NEXT: 5 10 1.00 * ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 5 10 1.00 * ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z23.d }, p3/z, [sp, x8, lsl #3]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z23.d }, p3/z, [x13, x8, lsl #3]
+# CHECK-NEXT: 5 10 1.00 * ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 1.00 * ld1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: 5 10 1.00 * ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 5 10 1.00 * ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 1.00 * ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z21.s }, p5/z, [x10, x21, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z23.d }, p3/z, [x13, x8, lsl #1]
+# CHECK-NEXT: 5 10 1.00 * ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 1.00 * ld1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 9 10 2.00 * ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 9 10 2.00 * ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 5 9 1.00 * ld1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z5.h }, p3/z, [sp, x16, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z5.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z0.b }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z31.b }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z31.d }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z31.h }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z31.s }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rd { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rd { z31.d }, p7/z, [sp, #504]
+# CHECK-NEXT: 1 6 0.33 * ld1rh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rh { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rh { z31.d }, p7/z, [sp, #126]
+# CHECK-NEXT: 1 6 0.33 * ld1rh { z31.h }, p7/z, [sp, #126]
+# CHECK-NEXT: 1 6 0.33 * ld1rh { z31.s }, p7/z, [sp, #126]
+# CHECK-NEXT: 1 6 0.33 * ld1rqb { z0.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rqb { z0.b }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rqb { z21.b }, p5/z, [x10, #112]
+# CHECK-NEXT: 1 6 0.33 * ld1rqb { z23.b }, p3/z, [x13, #-128]
+# CHECK-NEXT: 1 6 0.33 * ld1rqb { z31.b }, p7/z, [sp, #-16]
+# CHECK-NEXT: 1 6 0.33 * ld1rqd { z0.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 1 6 0.33 * ld1rqd { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rqd { z23.d }, p3/z, [x13, #-128]
+# CHECK-NEXT: 1 6 0.33 * ld1rqd { z23.d }, p3/z, [x13, #112]
+# CHECK-NEXT: 1 6 0.33 * ld1rqd { z31.d }, p7/z, [sp, #-16]
+# CHECK-NEXT: 1 6 0.33 * ld1rqh { z0.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1rqh { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rqh { z23.h }, p3/z, [x13, #-128]
+# CHECK-NEXT: 1 6 0.33 * ld1rqh { z23.h }, p3/z, [x13, #112]
+# CHECK-NEXT: 1 6 0.33 * ld1rqh { z31.h }, p7/z, [sp, #-16]
+# CHECK-NEXT: 1 6 0.33 * ld1rqw { z0.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1rqw { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rqw { z23.s }, p3/z, [x13, #-128]
+# CHECK-NEXT: 1 6 0.33 * ld1rqw { z23.s }, p3/z, [x13, #112]
+# CHECK-NEXT: 1 6 0.33 * ld1rqw { z31.s }, p7/z, [sp, #-16]
+# CHECK-NEXT: 1 6 0.33 * ld1rsb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rsb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rsb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rsb { z31.d }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rsb { z31.h }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rsb { z31.s }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rsh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rsh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rsh { z31.d }, p7/z, [sp, #126]
+# CHECK-NEXT: 1 6 0.33 * ld1rsh { z31.s }, p7/z, [sp, #126]
+# CHECK-NEXT: 1 6 0.33 * ld1rsw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rsw { z31.d }, p7/z, [sp, #252]
+# CHECK-NEXT: 1 6 0.33 * ld1rw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rw { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rw { z31.d }, p7/z, [sp, #252]
+# CHECK-NEXT: 1 6 0.33 * ld1rw { z31.s }, p7/z, [sp, #252]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.h }, p0/z, [sp, x0]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.h }, p0/z, [x0, x0]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z21.s }, p5/z, [x10, x21]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z23.d }, p3/z, [x13, x8]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 1.00 * ld1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 5 9 1.00 * ld1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 5 10 1.00 * ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 5 10 1.00 * ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 5 9 1.00 * ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 1.00 * ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z21.s }, p5/z, [sp, x21, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z21.s }, p5/z, [x10, x21, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z23.d }, p3/z, [x13, x8, lsl #1]
+# CHECK-NEXT: 5 10 1.00 * ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 1.00 * ld1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 9 10 2.00 * ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 9 10 2.00 * ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 5 9 1.00 * ld1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 5 10 1.00 * ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 5 10 1.00 * ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z23.d }, p3/z, [sp, x8, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z23.d }, p3/z, [x13, x8, lsl #2]
+# CHECK-NEXT: 5 10 1.00 * ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 1.00 * ld1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 5 10 1.00 * ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 5 10 1.00 * ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 5 9 1.00 * ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 1.00 * ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 5 9 1.00 * ld1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z21.s }, p5/z, [sp, x21, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z21.s }, p5/z, [x10, x21, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z23.d }, p3/z, [x13, x8, lsl #2]
+# CHECK-NEXT: 5 10 1.00 * ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 0.50 * ld1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 1.00 * ld1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 9 10 2.00 * ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: 9 10 2.00 * ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: 5 9 1.00 * ld1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: 6 9 0.67 * ld2b { z0.b, z1.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 4 8 0.67 * ld2b { z0.b, z1.b }, p0/z, [x0]
+# CHECK-NEXT: 4 8 0.67 * ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 4 8 0.67 * ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 6 9 0.67 * ld2b { z5.b, z6.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 6 9 0.67 * ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 4 8 0.67 * ld2d { z0.d, z1.d }, p0/z, [x0]
+# CHECK-NEXT: 4 8 0.67 * ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 4 8 0.67 * ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 6 9 0.67 * ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 6 9 0.67 * ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 4 8 0.67 * ld2h { z0.h, z1.h }, p0/z, [x0]
+# CHECK-NEXT: 4 8 0.67 * ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 4 8 0.67 * ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 6 9 0.67 * ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 6 9 0.67 * ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 4 8 0.67 * ld2w { z0.s, z1.s }, p0/z, [x0]
+# CHECK-NEXT: 4 8 0.67 * ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 4 8 0.67 * ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 6 9 0.67 * ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 9 10 1.00 * ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 6 9 1.00 * ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: 6 9 1.00 * ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 6 9 1.00 * ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 9 10 1.00 * ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 9 10 1.00 * ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 6 9 1.00 * ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: 6 9 1.00 * ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 6 9 1.00 * ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 9 10 1.00 * ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 9 10 1.00 * ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 6 9 1.00 * ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: 6 9 1.00 * ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 6 9 1.00 * ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 9 10 1.00 * ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 9 10 1.00 * ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 6 9 1.00 * ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: 6 9 1.00 * ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 6 9 1.00 * ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 9 10 1.00 * ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 16 10 2.00 * ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 12 9 2.00 * ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: 12 9 2.00 * ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 12 9 2.00 * ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 16 10 2.00 * ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 16 10 2.00 * ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 12 9 2.00 * ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: 12 9 2.00 * ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 12 9 2.00 * ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 16 10 2.00 * ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 16 10 2.00 * ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 12 9 2.00 * ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: 12 9 2.00 * ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 12 9 2.00 * ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 16 10 2.00 * ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 16 10 2.00 * ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 12 9 2.00 * ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: 12 9 2.00 * ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 12 9 2.00 * ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 16 10 2.00 * ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z0.d }, p0/z, [x0, x0]
+# CHECK-NEXT: 5 9 1.00 * U ldff1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z0.h }, p0/z, [x0, x0]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z0.s }, p0/z, [x0, x0]
+# CHECK-NEXT: 5 9 1.00 * U ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 1.00 * U ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 5 9 1.00 * U ldff1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 3 9 0.50 * U ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * U ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z31.b }, p7/z, [sp]
+# CHECK-NEXT: 3 9 0.50 * U ldff1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 1.00 * U ldff1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z31.h }, p7/z, [sp]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z31.s }, p7/z, [sp]
+# CHECK-NEXT: 5 9 1.00 * U ldff1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 2 6 0.33 * U ldff1d { z0.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 5 10 1.00 * U ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 5 10 1.00 * U ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 5 9 1.00 * U ldff1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 3 9 0.50 * U ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * U ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 5 10 1.00 * U ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: 3 9 0.50 * U ldff1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1d { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 1.00 * U ldff1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: 2 6 0.33 * U ldff1h { z0.d }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 5 10 1.00 * U ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 5 10 1.00 * U ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 5 9 1.00 * U ldff1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1h { z0.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 6 0.33 * U ldff1h { z0.s }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 5 9 1.00 * U ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 1.00 * U ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 5 9 1.00 * U ldff1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 3 9 0.50 * U ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * U ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 5 10 1.00 * U ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 3 9 0.50 * U ldff1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1h { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 1.00 * U ldff1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 2 6 0.33 * U ldff1h { z31.h }, p7/z, [sp]
+# CHECK-NEXT: 9 10 2.00 * U ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 9 10 2.00 * U ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 2 6 0.33 * U ldff1h { z31.s }, p7/z, [sp]
+# CHECK-NEXT: 5 9 1.00 * U ldff1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sb { z0.d }, p0/z, [x0, x0]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sb { z0.h }, p0/z, [x0, x0]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sb { z0.s }, p0/z, [x0, x0]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 3 9 0.50 * U ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * U ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 3 9 0.50 * U ldff1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sb { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sb { z31.h }, p7/z, [sp]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sb { z31.s }, p7/z, [sp]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sh { z0.d }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 5 10 1.00 * U ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 5 10 1.00 * U ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sh { z0.s }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 3 9 0.50 * U ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * U ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 5 10 1.00 * U ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 3 9 0.50 * U ldff1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sh { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 9 10 2.00 * U ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 9 10 2.00 * U ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sh { z31.s }, p7/z, [sp]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sw { z0.d }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 5 10 1.00 * U ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 5 10 1.00 * U ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 3 9 0.50 * U ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * U ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 5 10 1.00 * U ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 3 9 0.50 * U ldff1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sw { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 1.00 * U ldff1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 2 6 0.33 * U ldff1w { z0.d }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 5 10 1.00 * U ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 5 10 1.00 * U ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 5 9 1.00 * U ldff1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1w { z0.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 5 9 1.00 * U ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 1.00 * U ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 5 9 1.00 * U ldff1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 3 9 0.50 * U ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 0.50 * U ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 5 10 1.00 * U ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 3 9 0.50 * U ldff1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1w { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 1.00 * U ldff1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 9 10 2.00 * U ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: 9 10 2.00 * U ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: 2 6 0.33 * U ldff1w { z31.s }, p7/z, [sp]
+# CHECK-NEXT: 5 9 1.00 * U ldff1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z21.b }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z31.b }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1d { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1d { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sh { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1w { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1w { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1w { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1w { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1w { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldnt1b { z0.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 1 6 0.33 * ldnt1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: 4 9 0.67 * ldnt1b { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 6 9 1.00 * ldnt1b { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: 1 6 0.33 * ldnt1b { z21.b }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldnt1b { z23.b }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: 4 9 0.67 * ldnt1b { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 0.67 * ldnt1b { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 6 9 1.00 * ldnt1b { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: 6 9 1.00 * ldnt1b { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: 1 6 0.33 * ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 1 6 0.33 * ldnt1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 4 9 0.67 * ldnt1d { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 1 6 0.33 * ldnt1d { z21.d }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldnt1d { z23.d }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: 4 9 0.67 * ldnt1d { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 0.67 * ldnt1d { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 4 9 0.67 * ldnt1h { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 1 6 0.33 * ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ldnt1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 6 9 1.00 * ldnt1h { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: 1 6 0.33 * ldnt1h { z21.h }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldnt1h { z23.h }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: 4 9 0.67 * ldnt1h { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 0.67 * ldnt1h { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 6 9 1.00 * ldnt1h { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: 6 9 1.00 * ldnt1h { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: 4 9 0.67 * ldnt1sb { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 6 9 1.00 * ldnt1sb { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: 4 9 0.67 * ldnt1sb { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 0.67 * ldnt1sb { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 6 9 1.00 * ldnt1sb { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: 6 9 1.00 * ldnt1sb { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: 4 9 0.67 * ldnt1sh { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 6 9 1.00 * ldnt1sh { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: 4 9 0.67 * ldnt1sh { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 0.67 * ldnt1sh { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 6 9 1.00 * ldnt1sh { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: 6 9 1.00 * ldnt1sh { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: 4 9 0.67 * ldnt1sw { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 4 9 0.67 * ldnt1sw { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 0.67 * ldnt1sw { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 4 9 0.67 * ldnt1w { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 1 6 0.33 * ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ldnt1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 6 9 1.00 * ldnt1w { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: 1 6 0.33 * ldnt1w { z21.s }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldnt1w { z23.s }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: 4 9 0.67 * ldnt1w { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 0.67 * ldnt1w { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 6 9 1.00 * ldnt1w { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: 6 9 1.00 * ldnt1w { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: 2 6 0.50 * ldr p0, [x0]
+# CHECK-NEXT: 2 6 0.50 * ldr p5, [x10, #255, mul vl]
+# CHECK-NEXT: 2 6 0.50 * ldr p7, [x13, #-256, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldr z0, [x0]
+# CHECK-NEXT: 1 6 0.33 * ldr z23, [x13, #255, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldr z31, [sp, #-256, mul vl]
+# CHECK-NEXT: 1 2 0.50 lsl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 lsl z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 lsl z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: 1 2 0.50 lsl z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 lsl z0.b, z1.b, z2.d
+# CHECK-NEXT: 1 2 0.50 lsl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: 1 2 0.50 lsl z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 lsl z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.50 lsl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 lsl z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 lsl z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: 1 2 0.50 lsl z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 lsl z0.h, z1.h, z2.d
+# CHECK-NEXT: 1 2 0.50 lsl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 lsl z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 lsl z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: 1 2 0.50 lsl z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 lsl z0.s, z1.s, z2.d
+# CHECK-NEXT: 1 2 0.50 lsl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: 1 2 0.50 lsl z31.b, z31.b, #7
+# CHECK-NEXT: 1 2 0.50 lsl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: 1 2 0.50 lsl z31.d, z31.d, #63
+# CHECK-NEXT: 1 2 0.50 lsl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: 1 2 0.50 lsl z31.h, z31.h, #15
+# CHECK-NEXT: 1 2 0.50 lsl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: 1 2 0.50 lsl z31.s, z31.s, #31
+# CHECK-NEXT: 1 2 0.50 lslr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 lslr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 lslr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 lslr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 lsr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 1 2 0.50 lsr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 lsr z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: 1 2 0.50 lsr z0.b, z0.b, #1
+# CHECK-NEXT: 1 2 0.50 lsr z0.b, z1.b, z2.d
+# CHECK-NEXT: 1 2 0.50 lsr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 1 2 0.50 lsr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 lsr z0.d, z0.d, #1
+# CHECK-NEXT: 1 2 0.50 lsr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 1 2 0.50 lsr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 lsr z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: 1 2 0.50 lsr z0.h, z0.h, #1
+# CHECK-NEXT: 1 2 0.50 lsr z0.h, z1.h, z2.d
+# CHECK-NEXT: 1 2 0.50 lsr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 1 2 0.50 lsr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 lsr z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: 1 2 0.50 lsr z0.s, z0.s, #1
+# CHECK-NEXT: 1 2 0.50 lsr z0.s, z1.s, z2.d
+# CHECK-NEXT: 1 2 0.50 lsr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 1 2 0.50 lsr z31.b, z31.b, #8
+# CHECK-NEXT: 1 2 0.50 lsr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 1 2 0.50 lsr z31.d, z31.d, #64
+# CHECK-NEXT: 1 2 0.50 lsr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 1 2 0.50 lsr z31.h, z31.h, #16
+# CHECK-NEXT: 1 2 0.50 lsr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 1 2 0.50 lsr z31.s, z31.s, #32
+# CHECK-NEXT: 1 2 0.50 lsrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 lsrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 lsrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 lsrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 4 0.50 mad z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: 1 5 1.00 mad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 mad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 mad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 2 3 1.00 match p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 2 3 1.00 match p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 2 2 1.00 match p15.b, p7/z, z30.b, z31.b
+# CHECK-NEXT: 2 2 1.00 match p15.h, p7/z, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.50 mla z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: 1 5 1.00 mla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 5 1.00 mla z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: 1 4 0.50 mla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 mla z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 mla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 mla z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 0.50 mls z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: 1 5 1.00 mls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 5 1.00 mls z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: 1 4 0.50 mls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 mls z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 mls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 mls z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 2 0.50 mov p0.b, p0.b
+# CHECK-NEXT: 1 1 0.50 mov p0.b, p0/m, p0.b
+# CHECK-NEXT: 1 2 0.50 mov p0.b, p0/z, p0.b
+# CHECK-NEXT: 1 2 0.50 mov p15.b, p15.b
+# CHECK-NEXT: 1 1 0.50 mov p15.b, p15/m, p15.b
+# CHECK-NEXT: 1 2 0.50 mov p15.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.25 mov z0.b, #127
+# CHECK-NEXT: 1 2 0.25 mov z0.b, b0
+# CHECK-NEXT: 1 2 0.25 mov z0.b, p0/m, b0
+# CHECK-NEXT: 2 5 1.00 mov z0.b, p0/m, w0
+# CHECK-NEXT: 1 2 0.25 mov z0.b, p0/z, #127
+# CHECK-NEXT: 1 3 1.00 mov z0.b, w0
+# CHECK-NEXT: 1 2 0.25 mov z0.d, #0
+# CHECK-NEXT: 1 2 0.25 mov z0.d, #0xe0000000000003ff
+# CHECK-NEXT: 1 2 0.25 mov z0.d, #0xffffffffffff7fff
+# CHECK-NEXT: 1 2 0.25 mov z0.d, #32768
+# CHECK-NEXT: 1 2 0.25 mov z0.d, d0
+# CHECK-NEXT: 1 2 0.25 mov z0.d, p0/m, d0
+# CHECK-NEXT: 2 5 1.00 mov z0.d, p0/m, x0
+# CHECK-NEXT: 1 3 1.00 mov z0.d, x0
+# CHECK-NEXT: 1 2 0.25 mov z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 mov z0.h, #-256
+# CHECK-NEXT: 1 2 0.25 mov z0.h, #-32768
+# CHECK-NEXT: 1 2 0.25 mov z0.h, #0
+# CHECK-NEXT: 1 2 0.25 mov z0.h, #32512
+# CHECK-NEXT: 1 2 0.25 mov z0.h, #32767
+# CHECK-NEXT: 1 2 0.25 mov z0.h, h0
+# CHECK-NEXT: 1 2 0.25 mov z0.h, p0/m, h0
+# CHECK-NEXT: 2 5 1.00 mov z0.h, p0/m, w0
+# CHECK-NEXT: 1 2 0.25 mov z0.h, p0/z, #32512
+# CHECK-NEXT: 1 3 1.00 mov z0.h, w0
+# CHECK-NEXT: 1 2 0.25 mov z0.q, q0
+# CHECK-NEXT: 1 2 0.25 mov z0.s, #0
+# CHECK-NEXT: 1 2 0.25 mov z0.s, #0xffff7fff
+# CHECK-NEXT: 1 2 0.25 mov z0.s, #32768
+# CHECK-NEXT: 1 2 0.25 mov z0.s, p0/m, s0
+# CHECK-NEXT: 2 5 1.00 mov z0.s, p0/m, w0
+# CHECK-NEXT: 1 2 0.25 mov z0.s, s0
+# CHECK-NEXT: 1 3 1.00 mov z0.s, w0
+# CHECK-NEXT: 1 2 0.25 mov z21.d, #-128
+# CHECK-NEXT: 1 2 0.25 mov z21.d, #-32768
+# CHECK-NEXT: 1 2 0.25 mov z21.d, #127
+# CHECK-NEXT: 1 2 0.25 mov z21.d, #32512
+# CHECK-NEXT: 1 2 0.25 mov z21.d, p0/z, #-128
+# CHECK-NEXT: 1 2 0.25 mov z21.d, p0/z, #-32768
+# CHECK-NEXT: 1 2 0.25 mov z21.d, p0/z, #127
+# CHECK-NEXT: 1 2 0.25 mov z21.d, p0/z, #32512
+# CHECK-NEXT: 1 2 0.25 mov z21.d, p15/m, #-128
+# CHECK-NEXT: 1 2 0.25 mov z21.d, p15/m, #-32768
+# CHECK-NEXT: 1 2 0.25 mov z21.h, #-128
+# CHECK-NEXT: 1 2 0.25 mov z21.h, #-32768
+# CHECK-NEXT: 1 2 0.25 mov z21.h, #127
+# CHECK-NEXT: 1 2 0.25 mov z21.h, #32512
+# CHECK-NEXT: 1 2 0.25 mov z21.h, p0/z, #-128
+# CHECK-NEXT: 1 2 0.25 mov z21.h, p0/z, #-32768
+# CHECK-NEXT: 1 2 0.25 mov z21.h, p0/z, #127
+# CHECK-NEXT: 1 2 0.25 mov z21.h, p0/z, #32512
+# CHECK-NEXT: 1 2 0.25 mov z21.h, p15/m, #-128
+# CHECK-NEXT: 1 2 0.25 mov z21.h, p15/m, #-32768
+# CHECK-NEXT: 1 2 0.25 mov z21.s, #-128
+# CHECK-NEXT: 1 2 0.25 mov z21.s, #-32768
+# CHECK-NEXT: 1 2 0.25 mov z21.s, #127
+# CHECK-NEXT: 1 2 0.25 mov z21.s, #32512
+# CHECK-NEXT: 1 2 0.25 mov z21.s, p0/z, #-128
+# CHECK-NEXT: 1 2 0.25 mov z21.s, p0/z, #-32768
+# CHECK-NEXT: 1 2 0.25 mov z21.s, p0/z, #127
+# CHECK-NEXT: 1 2 0.25 mov z21.s, p0/z, #32512
+# CHECK-NEXT: 1 2 0.25 mov z21.s, p15/m, #-128
+# CHECK-NEXT: 1 2 0.25 mov z21.s, p15/m, #-32768
+# CHECK-NEXT: 1 2 0.25 mov z31.b, p15/m, z31.b
+# CHECK-NEXT: 1 2 0.25 mov z31.b, p7/m, b31
+# CHECK-NEXT: 1 2 0.25 movprfx z31, z6
+# CHECK-NEXT: 2 5 1.00 mov z31.b, p7/m, wsp
+# CHECK-NEXT: 1 3 1.00 mov z31.b, wsp
+# CHECK-NEXT: 1 2 0.25 mov z31.b, z31.b[63]
+# CHECK-NEXT: 1 2 0.25 mov z31.d, p15/m, z31.d
+# CHECK-NEXT: 1 2 0.25 mov z31.d, p7/m, d31
+# CHECK-NEXT: 1 2 0.25 movprfx z31.d, p7/z, z6.d
+# CHECK-NEXT: 2 5 1.00 mov z31.d, p7/m, sp
+# CHECK-NEXT: 1 3 1.00 mov z31.d, sp
+# CHECK-NEXT: 1 2 0.25 mov z31.d, z0.d
+# CHECK-NEXT: 1 2 0.25 mov z31.d, z31.d[7]
+# CHECK-NEXT: 1 2 0.25 mov z31.h, p15/m, z31.h
+# CHECK-NEXT: 1 2 0.25 mov z31.h, p7/m, h31
+# CHECK-NEXT: 2 5 1.00 mov z31.h, p7/m, wsp
+# CHECK-NEXT: 1 3 1.00 mov z31.h, wsp
+# CHECK-NEXT: 1 2 0.25 mov z31.h, z31.h[31]
+# CHECK-NEXT: 1 2 0.25 mov z31.s, p15/m, z31.s
+# CHECK-NEXT: 1 2 0.25 mov z31.s, p7/m, s31
+# CHECK-NEXT: 2 5 1.00 mov z31.s, p7/m, wsp
+# CHECK-NEXT: 1 3 1.00 mov z31.s, wsp
+# CHECK-NEXT: 1 2 0.25 mov z31.s, z31.s[15]
+# CHECK-NEXT: 1 2 0.25 mov z5.b, #-1
+# CHECK-NEXT: 1 2 0.25 mov z5.b, #-128
+# CHECK-NEXT: 1 2 0.25 mov z5.b, #127
+# CHECK-NEXT: 1 2 0.25 mov z5.b, p0/z, #-1
+# CHECK-NEXT: 1 2 0.25 mov z5.b, p0/z, #-128
+# CHECK-NEXT: 1 2 0.25 mov z5.b, p0/z, #127
+# CHECK-NEXT: 1 2 0.25 mov z5.b, p15/m, #-128
+# CHECK-NEXT: 1 2 0.25 mov z5.d, #-6
+# CHECK-NEXT: 1 2 0.25 mov z5.h, #-6
+# CHECK-NEXT: 1 2 0.25 mov z5.q, z17.q[3]
+# CHECK-NEXT: 1 2 0.25 mov z5.s, #-6
+# CHECK-NEXT: 1 2 0.50 movs p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 movs p0.b, p0/z, p0.b
+# CHECK-NEXT: 1 2 0.50 movs p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 movs p15.b, p15/z, p15.b
+# CHECK-NEXT: 1 1 0.10 U mrs x3, ID_AA64ZFR0_EL1
+# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL1
+# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL12
+# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL2
+# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL3
+# CHECK-NEXT: 1 4 0.50 msb z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: 1 5 1.00 msb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 msb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 msb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL1, x3
+# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL12, x3
+# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL2, x3
+# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL3, x3
+# CHECK-NEXT: 1 4 0.50 mul z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: 1 4 0.50 mul z0.b, z1.b, z2.b
+# CHECK-NEXT: 2 5 1.00 mul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 5 1.00 mul z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: 1 4 0.50 mul z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 4 0.50 mul z0.h, z1.h, z2.h
+# CHECK-NEXT: 1 4 0.50 mul z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 mul z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 4 0.50 mul z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 0.50 mul z29.s, z30.s, z31.s
+# CHECK-NEXT: 1 4 0.50 mul z31.b, z31.b, #-128
+# CHECK-NEXT: 1 4 0.50 mul z31.b, z31.b, #127
+# CHECK-NEXT: 2 5 1.00 mul z31.d, z31.d, #-128
+# CHECK-NEXT: 2 5 1.00 mul z31.d, z31.d, #127
+# CHECK-NEXT: 2 5 1.00 mul z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 4 0.50 mul z31.h, z31.h, #-128
+# CHECK-NEXT: 1 4 0.50 mul z31.h, z31.h, #127
+# CHECK-NEXT: 1 4 0.50 mul z31.s, z31.s, #-128
+# CHECK-NEXT: 1 4 0.50 mul z31.s, z31.s, #127
+# CHECK-NEXT: 1 2 0.50 nand p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 nand p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 nands p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 nands p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.25 nbsl z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: 1 2 0.25 neg z0.b, p0/m, z0.b
+# CHECK-NEXT: 1 2 0.25 neg z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.25 neg z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 2 0.25 neg z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 2 0.25 neg z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.25 neg z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 neg z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 neg z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 3 1.00 nmatch p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 2 3 1.00 nmatch p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 2 2 1.00 nmatch p15.b, p7/z, z30.b, z31.b
+# CHECK-NEXT: 2 2 1.00 nmatch p15.h, p7/z, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 nor p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 nor p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 nors p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 nors p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 not p0.b, p0/z, p0.b
+# CHECK-NEXT: 1 2 0.50 not p15.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.25 not z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.25 not z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 not z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 not z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 nots p0.b, p0/z, p0.b
+# CHECK-NEXT: 1 2 0.50 nots p15.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.50 orn p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 orn p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 orns p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 orns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 orr p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 2 0.25 orr z0.d, z0.d, #0x6
+# CHECK-NEXT: 1 2 0.25 orr z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: 1 2 0.25 orr z0.s, z0.s, #0x6
+# CHECK-NEXT: 1 2 0.25 orr z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: 1 2 0.25 orr z23.d, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.25 orr z23.h, z23.h, #0x6
+# CHECK-NEXT: 1 2 0.25 orr z23.h, z23.h, #0xfff9
+# CHECK-NEXT: 1 2 0.25 orr z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 orr z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 orr z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 orr z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 orr z5.b, z5.b, #0x6
+# CHECK-NEXT: 1 2 0.25 orr z5.b, z5.b, #0xf9
+# CHECK-NEXT: 1 2 0.50 orrs p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 2 6 0.50 orv b0, p7, z31.b
+# CHECK-NEXT: 2 6 0.50 orv d0, p7, z31.d
+# CHECK-NEXT: 2 6 0.50 orv h0, p7, z31.h
+# CHECK-NEXT: 2 6 0.50 orv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.50 pfalse p15.b
+# CHECK-NEXT: 1 2 0.50 pfirst p0.b, p15, p0.b
+# CHECK-NEXT: 1 2 0.50 pfirst p15.b, p15, p15.b
+# CHECK-NEXT: 1 2 0.25 pmul z0.b, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 pmul z29.b, z30.b, z31.b
+# CHECK-NEXT: 1 2 0.25 pmullb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 pmullb z29.q, z30.d, z31.d
+# CHECK-NEXT: 1 2 0.25 pmullb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 pmullt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 pmullt z29.q, z30.d, z31.d
+# CHECK-NEXT: 1 2 0.25 pmullt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 pnext p0.b, p15, p0.b
+# CHECK-NEXT: 1 2 0.50 pnext p0.d, p15, p0.d
+# CHECK-NEXT: 1 2 0.50 pnext p0.h, p15, p0.h
+# CHECK-NEXT: 1 2 0.50 pnext p0.s, p15, p0.s
+# CHECK-NEXT: 1 2 0.50 pnext p15.b, p15, p15.b
+# CHECK-NEXT: 1 4 0.33 * * U prfb #14, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb #15, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb #6, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb #7, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb #7, p3, [z13.s, #31]
+# CHECK-NEXT: 1 4 0.33 * * U prfb #7, p3, [z13.s]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1keep, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1keep, p0, [x0, z0.d]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1keep, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl3strm, p5, [x10, z21.d, sxtw]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl3strm, p5, [x10, z21.s, uxtw]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl3strm, p5, [z10.d, #31]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl3strm, p5, [z10.d]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pstl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pstl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pstl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pstl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pstl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pstl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #14, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p7, [z31.d, #248]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p7, [z31.d]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p7, [z31.s, #248]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p7, [z31.s]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #6, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #7, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0, z0.d, lsl #3]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0, z0.s, sxtw #3]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0, z0.s, uxtw #3]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pstl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pstl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pstl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pstl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pstl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pstl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #14, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p7, [z31.d, #62]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p7, [z31.d]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p7, [z31.s, #62]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p7, [z31.s]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #6, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #7, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl1keep, p0, [x0, z0.d, lsl #1]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl3strm, p5, [x10, z21.d, sxtw #1]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl3strm, p5, [x10, z21.d, uxtw #1]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl3strm, p5, [x10, z21.s, sxtw #1]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl3strm, p5, [x10, z21.s, uxtw #1]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pstl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pstl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pstl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pstl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pstl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pstl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #14, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p7, [z31.d, #124]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p7, [z31.d]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p7, [z31.s, #124]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p7, [z31.s]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #6, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #7, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #7, p3, [x13, z8.d, uxtw #2]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl1keep, p0, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl1keep, p0, [x0, z0.s, uxtw #2]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl3strm, p5, [x10, z21.d, lsl #2]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl3strm, p5, [x10, z21.s, sxtw #2]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pstl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pstl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pstl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pstl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pstl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pstl3strm, p0, [x0]
+# CHECK-NEXT: 1 1 0.50 ptest p15, p0.b
+# CHECK-NEXT: 1 1 0.50 ptest p15, p15.b
+# CHECK-NEXT: 1 2 0.50 ptrue p0.b, pow2
+# CHECK-NEXT: 1 2 0.50 ptrue p0.d, pow2
+# CHECK-NEXT: 1 2 0.50 ptrue p0.h, pow2
+# CHECK-NEXT: 1 2 0.50 ptrue p0.s, pow2
+# CHECK-NEXT: 1 2 0.50 ptrue p15.b
+# CHECK-NEXT: 1 2 0.50 ptrue p15.d
+# CHECK-NEXT: 1 2 0.50 ptrue p15.h
+# CHECK-NEXT: 1 2 0.50 ptrue p15.s
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #14
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #15
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #16
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #17
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #18
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #19
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #20
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #21
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #22
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #23
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #24
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #25
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #26
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #27
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #28
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, mul3
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, mul4
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl1
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl128
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl16
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl2
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl256
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl3
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl32
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl4
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl5
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl6
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl64
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl7
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl8
+# CHECK-NEXT: 1 2 0.50 ptrues p0.b, pow2
+# CHECK-NEXT: 1 2 0.50 ptrues p0.d, pow2
+# CHECK-NEXT: 1 2 0.50 ptrues p0.h, pow2
+# CHECK-NEXT: 1 2 0.50 ptrues p0.s, pow2
+# CHECK-NEXT: 1 2 0.50 ptrues p15.b
+# CHECK-NEXT: 1 2 0.50 ptrues p15.d
+# CHECK-NEXT: 1 2 0.50 ptrues p15.h
+# CHECK-NEXT: 1 2 0.50 ptrues p15.s
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #14
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #15
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #16
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #17
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #18
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #19
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #20
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #21
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #22
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #23
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #24
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #25
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #26
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #27
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #28
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, mul3
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, mul4
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl1
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl128
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl16
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl2
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl256
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl3
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl32
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl4
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl5
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl6
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl64
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl7
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl8
+# CHECK-NEXT: 1 2 0.50 punpkhi p0.h, p0.b
+# CHECK-NEXT: 1 2 0.50 punpkhi p15.h, p15.b
+# CHECK-NEXT: 1 2 0.50 punpklo p0.h, p0.b
+# CHECK-NEXT: 1 2 0.50 punpklo p15.h, p15.b
+# CHECK-NEXT: 1 2 0.25 raddhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 raddhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 raddhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 raddhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 raddhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 raddhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 rax1 z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 rbit z0.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.25 rbit z0.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 rbit z0.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 rbit z0.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 1.00 * U rdffr p0.b
+# CHECK-NEXT: 2 4 1.00 * U rdffr p0.b, p0/z
+# CHECK-NEXT: 1 2 1.00 * U rdffr p15.b
+# CHECK-NEXT: 2 4 1.00 * U rdffr p15.b, p15/z
+# CHECK-NEXT: 2 4 1.00 U rdffrs p0.b, p0/z
+# CHECK-NEXT: 2 4 1.00 U rdffrs p15.b, p15/z
+# CHECK-NEXT: 1 2 0.50 rdvl x0, #0
+# CHECK-NEXT: 1 2 0.50 rdvl x21, #-32
+# CHECK-NEXT: 1 2 0.50 rdvl x23, #31
+# CHECK-NEXT: 1 2 0.50 rdvl xzr, #-1
+# CHECK-NEXT: 1 2 0.50 rev p0.b, p1.b
+# CHECK-NEXT: 1 2 0.50 rev p0.d, p1.d
+# CHECK-NEXT: 1 2 0.50 rev p0.h, p1.h
+# CHECK-NEXT: 1 2 0.50 rev p0.s, p1.s
+# CHECK-NEXT: 1 2 0.25 rev z0.b, z31.b
+# CHECK-NEXT: 1 2 0.25 rev z0.d, z31.d
+# CHECK-NEXT: 1 2 0.25 rev z0.h, z31.h
+# CHECK-NEXT: 1 2 0.25 rev z0.s, z31.s
+# CHECK-NEXT: 1 2 0.25 revb z0.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 revb z0.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 revb z0.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 revh z0.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 revh z0.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 revw z0.d, p7/m, z31.d
+# CHECK-NEXT: 1 4 0.25 rshrnb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 rshrnb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 rshrnb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 rshrnb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 rshrnb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 rshrnb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.25 rshrnt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 rshrnt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 rshrnt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 rshrnt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 rshrnt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 rshrnt z31.s, z31.d, #32
+# CHECK-NEXT: 1 2 0.25 rsubhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 rsubhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 rsubhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 rsubhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 rsubhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 rsubhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 saba z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.25 saba z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 saba z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 saba z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.25 sabalb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.25 sabalb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.25 sabalb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 sabalt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.25 sabalt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.25 sabalt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 sabd z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 sabd z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 sabd z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 sabd z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 sabdlb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 sabdlb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.25 sabdlb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 sabdlt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 sabdlt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.25 sabdlt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 4 0.25 sadalp z0.h, p0/m, z1.b
+# CHECK-NEXT: 1 4 0.25 sadalp z29.s, p0/m, z30.h
+# CHECK-NEXT: 1 4 0.25 sadalp z30.d, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 saddlb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 saddlb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.25 saddlb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 saddlbt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 saddlbt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 2 0.25 saddlbt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 saddlt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 saddlt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.25 saddlt z31.d, z31.s, z31.s
+# CHECK-NEXT: 6 9 2.00 saddv d0, p7, z31.b
+# CHECK-NEXT: 4 8 1.00 saddv d0, p7, z31.h
+# CHECK-NEXT: 4 6 1.00 saddv d0, p7, z31.s
+# CHECK-NEXT: 1 2 0.25 saddwb z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.25 saddwb z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.25 saddwb z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 2 0.25 saddwt z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.25 saddwt z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.25 saddwt z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 2 0.25 sbclb z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 sbclb z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 sbclt z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 sbclt z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 3 0.50 scvtf z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 3 0.50 scvtf z0.d, p0/m, z0.s
+# CHECK-NEXT: 1 3 0.50 scvtf z0.h, p0/m, z0.d
+# CHECK-NEXT: 4 6 2.00 scvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: 2 4 1.00 scvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: 1 3 0.50 scvtf z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 1.00 scvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 20 20.00 sdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 12 11.00 sdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 20 20.00 sdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 12 11.00 sdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 3 0.50 sdot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: 1 3 0.50 sdot z0.d, z1.h, z31.h
+# CHECK-NEXT: 1 3 0.25 sdot z0.s, z1.b, z31.b
+# CHECK-NEXT: 1 3 0.25 sdot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: 1 1 0.50 sel p0.b, p1, p2.b, p3.b
+# CHECK-NEXT: 1 2 0.25 sel z23.b, p11, z13.b, z8.b
+# CHECK-NEXT: 1 2 0.25 sel z23.d, p11, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.25 sel z23.h, p11, z13.h, z8.h
+# CHECK-NEXT: 1 2 0.25 sel z23.s, p11, z13.s, z8.s
+# CHECK-NEXT: 1 2 1.00 * U setffr
+# CHECK-NEXT: 1 2 0.25 shadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 shadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 shadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 shadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.25 shrnb z0.b, z0.h, #1
+# CHECK-NEXT: 1 2 0.25 shrnb z0.h, z0.s, #1
+# CHECK-NEXT: 1 2 0.25 shrnb z0.s, z0.d, #1
+# CHECK-NEXT: 1 2 0.25 shrnb z31.b, z31.h, #8
+# CHECK-NEXT: 1 2 0.25 shrnb z31.h, z31.s, #16
+# CHECK-NEXT: 1 2 0.25 shrnb z31.s, z31.d, #32
+# CHECK-NEXT: 1 2 0.25 shrnt z0.b, z0.h, #1
+# CHECK-NEXT: 1 2 0.25 shrnt z0.h, z0.s, #1
+# CHECK-NEXT: 1 2 0.25 shrnt z0.s, z0.d, #1
+# CHECK-NEXT: 1 2 0.25 shrnt z31.b, z31.h, #8
+# CHECK-NEXT: 1 2 0.25 shrnt z31.h, z31.s, #16
+# CHECK-NEXT: 1 2 0.25 shrnt z31.s, z31.d, #32
+# CHECK-NEXT: 1 2 0.25 shsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 shsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 shsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 shsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.25 shsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 shsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 shsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 shsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.25 sli z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 sli z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.25 sli z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.25 sli z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.25 sli z31.b, z31.b, #7
+# CHECK-NEXT: 1 2 0.25 sli z31.d, z31.d, #63
+# CHECK-NEXT: 1 2 0.25 sli z31.h, z31.h, #15
+# CHECK-NEXT: 1 2 0.25 sli z31.s, z31.s, #31
+# CHECK-NEXT: 1 4 1.00 sm4e z0.s, z0.s, z31.s
+# CHECK-NEXT: 1 4 1.00 sm4ekey z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 smax z0.b, z0.b, #-128
+# CHECK-NEXT: 1 2 0.25 smax z0.d, z0.d, #-128
+# CHECK-NEXT: 1 2 0.25 smax z0.h, z0.h, #-128
+# CHECK-NEXT: 1 2 0.25 smax z0.s, z0.s, #-128
+# CHECK-NEXT: 1 2 0.25 smax z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 smax z31.b, z31.b, #127
+# CHECK-NEXT: 1 2 0.25 smax z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 smax z31.d, z31.d, #127
+# CHECK-NEXT: 1 2 0.25 smax z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 smax z31.h, z31.h, #127
+# CHECK-NEXT: 1 2 0.25 smax z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 smax z31.s, z31.s, #127
+# CHECK-NEXT: 1 2 0.25 smaxp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 smaxp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 smaxp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 smaxp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 6 9 2.00 smaxv b0, p7, z31.b
+# CHECK-NEXT: 2 4 0.50 smaxv d0, p7, z31.d
+# CHECK-NEXT: 4 8 1.00 smaxv h0, p7, z31.h
+# CHECK-NEXT: 4 6 1.00 smaxv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.25 smin z0.b, z0.b, #-128
+# CHECK-NEXT: 1 2 0.25 smin z0.d, z0.d, #-128
+# CHECK-NEXT: 1 2 0.25 smin z0.h, z0.h, #-128
+# CHECK-NEXT: 1 2 0.25 smin z0.s, z0.s, #-128
+# CHECK-NEXT: 1 2 0.25 smin z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 smin z31.b, z31.b, #127
+# CHECK-NEXT: 1 2 0.25 smin z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 smin z31.d, z31.d, #127
+# CHECK-NEXT: 1 2 0.25 smin z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 smin z31.h, z31.h, #127
+# CHECK-NEXT: 1 2 0.25 smin z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 smin z31.s, z31.s, #127
+# CHECK-NEXT: 1 2 0.25 sminp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 sminp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 sminp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 sminp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 6 9 2.00 sminv b0, p7, z31.b
+# CHECK-NEXT: 2 4 0.50 sminv d0, p7, z31.d
+# CHECK-NEXT: 4 8 1.00 sminv h0, p7, z31.h
+# CHECK-NEXT: 4 6 1.00 sminv s0, p7, z31.s
+# CHECK-NEXT: 1 4 0.50 smlalb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 smlalb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 smlalb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 smlalb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 smlalb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 smlalt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 smlalt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 smlalt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 smlalt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 smlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 smlslb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 smlslb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 smlslb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 smlslb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 smlslb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 smlslt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 smlslt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 smlslt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 smlslt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 smlslt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 3 0.25 smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: 1 4 0.50 smulh z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: 1 4 0.50 smulh z0.b, z1.b, z2.b
+# CHECK-NEXT: 2 5 1.00 smulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 4 0.50 smulh z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 4 0.50 smulh z0.h, z1.h, z2.h
+# CHECK-NEXT: 1 4 0.50 smulh z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 4 0.50 smulh z29.s, z30.s, z31.s
+# CHECK-NEXT: 2 5 1.00 smulh z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 4 0.50 smullb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 smullb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 4 0.50 smullb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 smullb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.50 smullb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 4 0.50 smullt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 smullt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 4 0.50 smullt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 smullt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.50 smullt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 3 1.00 splice z29.b, p7, { z30.b, z31.b }
+# CHECK-NEXT: 1 3 1.00 splice z29.d, p7, { z30.d, z31.d }
+# CHECK-NEXT: 1 3 1.00 splice z29.h, p7, { z30.h, z31.h }
+# CHECK-NEXT: 1 3 1.00 splice z29.s, p7, { z30.s, z31.s }
+# CHECK-NEXT: 1 3 1.00 splice z31.b, p7, z31.b, z31.b
+# CHECK-NEXT: 1 3 1.00 splice z31.d, p7, z31.d, z31.d
+# CHECK-NEXT: 1 3 1.00 splice z31.h, p7, z31.h, z31.h
+# CHECK-NEXT: 1 3 1.00 splice z31.s, p7, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 sqabs z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.25 sqabs z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 sqabs z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 sqabs z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 sqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 sqadd z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 sqadd z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 sqadd z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.25 sqadd z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 sqadd z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 sqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 sqadd z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.25 sqadd z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 sqadd z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.25 sqadd z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.25 sqadd z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 sqadd z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 sqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 sqadd z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.25 sqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.25 sqadd z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.25 sqadd z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.25 sqadd z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.25 sqcadd z0.b, z0.b, z0.b, #90
+# CHECK-NEXT: 1 2 0.25 sqcadd z0.d, z0.d, z0.d, #90
+# CHECK-NEXT: 1 2 0.25 sqcadd z0.h, z0.h, z0.h, #90
+# CHECK-NEXT: 1 2 0.25 sqcadd z0.s, z0.s, z0.s, #90
+# CHECK-NEXT: 1 2 0.25 sqcadd z31.b, z31.b, z31.b, #270
+# CHECK-NEXT: 1 2 0.25 sqcadd z31.d, z31.d, z31.d, #270
+# CHECK-NEXT: 1 2 0.25 sqcadd z31.h, z31.h, z31.h, #270
+# CHECK-NEXT: 1 2 0.25 sqcadd z31.s, z31.s, z31.s, #270
+# CHECK-NEXT: 1 2 0.50 sqdecb x0
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, #14
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, w0
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecd x0
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, #14
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, w0
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.25 sqdecd z0.d
+# CHECK-NEXT: 1 2 0.25 sqdecd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.25 sqdecd z0.d, pow2
+# CHECK-NEXT: 1 2 0.25 sqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdech x0
+# CHECK-NEXT: 1 2 0.50 sqdech x0, #14
+# CHECK-NEXT: 1 2 0.50 sqdech x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdech x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdech x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqdech x0, w0
+# CHECK-NEXT: 1 2 0.50 sqdech x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdech x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdech x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.25 sqdech z0.h
+# CHECK-NEXT: 1 2 0.25 sqdech z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.25 sqdech z0.h, pow2
+# CHECK-NEXT: 1 2 0.25 sqdech z0.h, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecp x0, p0.b
+# CHECK-NEXT: 1 2 0.50 sqdecp x0, p0.d
+# CHECK-NEXT: 1 2 0.50 sqdecp x0, p0.h
+# CHECK-NEXT: 1 2 0.50 sqdecp x0, p0.s
+# CHECK-NEXT: 1 2 0.50 sqdecp xzr, p15.b, wzr
+# CHECK-NEXT: 1 2 0.50 sqdecp xzr, p15.d, wzr
+# CHECK-NEXT: 1 2 0.50 sqdecp xzr, p15.h, wzr
+# CHECK-NEXT: 1 2 0.50 sqdecp xzr, p15.s, wzr
+# CHECK-NEXT: 3 7 1.00 sqdecp z0.d, p0.d
+# CHECK-NEXT: 3 7 1.00 sqdecp z0.h, p0.h
+# CHECK-NEXT: 3 7 1.00 sqdecp z0.s, p0.s
+# CHECK-NEXT: 1 2 0.50 sqdecw x0
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, #14
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, w0
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.25 sqdecw z0.s
+# CHECK-NEXT: 1 2 0.25 sqdecw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.25 sqdecw z0.s, pow2
+# CHECK-NEXT: 1 2 0.25 sqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: 1 4 0.50 sqdmlalb z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: 1 4 0.50 sqdmlalb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sqdmlalb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 sqdmlalb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 sqdmlalb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 sqdmlalbt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sqdmlalbt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 sqdmlalbt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 sqdmlalt z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: 1 4 0.50 sqdmlalt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sqdmlalt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 sqdmlalt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 sqdmlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 sqdmlslb z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: 1 4 0.50 sqdmlslb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sqdmlslb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 sqdmlslb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 sqdmlslb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 sqdmlslbt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sqdmlslbt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 sqdmlslbt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 sqdmlslt z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: 1 4 0.50 sqdmlslt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sqdmlslt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 sqdmlslt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 sqdmlslt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 sqdmulh z0.b, z1.b, z2.b
+# CHECK-NEXT: 2 5 1.00 sqdmulh z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: 1 4 0.50 sqdmulh z0.h, z1.h, z2.h
+# CHECK-NEXT: 1 4 0.50 sqdmulh z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 sqdmulh z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 0.50 sqdmulh z29.s, z30.s, z31.s
+# CHECK-NEXT: 2 5 1.00 sqdmulh z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 4 0.50 sqdmullb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 sqdmullb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 4 0.50 sqdmullb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 sqdmullb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.50 sqdmullb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sqdmullt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 sqdmullt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 4 0.50 sqdmullt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 sqdmullt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.50 sqdmullt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 sqincb x0
+# CHECK-NEXT: 1 2 0.50 sqincb x0, #14
+# CHECK-NEXT: 1 2 0.50 sqincb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincb x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincb x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqincb x0, w0
+# CHECK-NEXT: 1 2 0.50 sqincb x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincb x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincb x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincd x0
+# CHECK-NEXT: 1 2 0.50 sqincd x0, #14
+# CHECK-NEXT: 1 2 0.50 sqincd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincd x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincd x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqincd x0, w0
+# CHECK-NEXT: 1 2 0.50 sqincd x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincd x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincd x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.25 sqincd z0.d
+# CHECK-NEXT: 1 2 0.25 sqincd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.25 sqincd z0.d, pow2
+# CHECK-NEXT: 1 2 0.25 sqincd z0.d, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqinch x0
+# CHECK-NEXT: 1 2 0.50 sqinch x0, #14
+# CHECK-NEXT: 1 2 0.50 sqinch x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqinch x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqinch x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqinch x0, w0
+# CHECK-NEXT: 1 2 0.50 sqinch x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqinch x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqinch x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.25 sqinch z0.h
+# CHECK-NEXT: 1 2 0.25 sqinch z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.25 sqinch z0.h, pow2
+# CHECK-NEXT: 1 2 0.25 sqinch z0.h, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincp x0, p0.b
+# CHECK-NEXT: 1 2 0.50 sqincp x0, p0.d
+# CHECK-NEXT: 1 2 0.50 sqincp x0, p0.h
+# CHECK-NEXT: 1 2 0.50 sqincp x0, p0.s
+# CHECK-NEXT: 1 2 0.50 sqincp xzr, p15.b, wzr
+# CHECK-NEXT: 1 2 0.50 sqincp xzr, p15.d, wzr
+# CHECK-NEXT: 1 2 0.50 sqincp xzr, p15.h, wzr
+# CHECK-NEXT: 1 2 0.50 sqincp xzr, p15.s, wzr
+# CHECK-NEXT: 3 7 1.00 sqincp z0.d, p0.d
+# CHECK-NEXT: 3 7 1.00 sqincp z0.h, p0.h
+# CHECK-NEXT: 3 7 1.00 sqincp z0.s, p0.s
+# CHECK-NEXT: 1 2 0.50 sqincw x0
+# CHECK-NEXT: 1 2 0.50 sqincw x0, #14
+# CHECK-NEXT: 1 2 0.50 sqincw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincw x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincw x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqincw x0, w0
+# CHECK-NEXT: 1 2 0.50 sqincw x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincw x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincw x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.25 sqincw z0.s
+# CHECK-NEXT: 1 2 0.25 sqincw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.25 sqincw z0.s, pow2
+# CHECK-NEXT: 1 2 0.25 sqincw z0.s, pow2, mul #16
+# CHECK-NEXT: 1 2 0.25 sqneg z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.25 sqneg z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 sqneg z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 sqneg z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z0.b, z1.b, z2.b, #0
+# CHECK-NEXT: 1 5 1.00 sqrdcmlah z0.d, z1.d, z2.d, #0
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z0.h, z1.h, z2.h, #0
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z0.h, z1.h, z2.h[0], #0
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z0.s, z1.s, z2.s, #0
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z0.s, z1.s, z2.s[0], #0
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z15.b, z16.b, z17.b, #270
+# CHECK-NEXT: 1 5 1.00 sqrdcmlah z15.d, z16.d, z17.d, #270
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z15.h, z16.h, z17.h, #270
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z15.s, z16.s, z17.s, #270
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z29.b, z30.b, z31.b, #90
+# CHECK-NEXT: 1 5 1.00 sqrdcmlah z29.d, z30.d, z31.d, #90
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z29.h, z30.h, z31.h, #90
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z29.s, z30.s, z31.s, #90
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z31.b, z31.b, z31.b, #180
+# CHECK-NEXT: 1 5 1.00 sqrdcmlah z31.d, z31.d, z31.d, #180
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z31.h, z30.h, z7.h[0], #180
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z31.h, z31.h, z31.h, #180
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z31.s, z30.s, z7.s[0], #180
+# CHECK-NEXT: 1 4 0.50 sqrdcmlah z31.s, z31.s, z31.s, #180
+# CHECK-NEXT: 1 4 0.50 sqrdmlah z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 5 1.00 sqrdmlah z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: 1 5 1.00 sqrdmlah z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 sqrdmlah z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 sqrdmlah z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 sqrdmlah z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sqrdmlah z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 0.50 sqrdmlsh z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 5 1.00 sqrdmlsh z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: 1 5 1.00 sqrdmlsh z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 sqrdmlsh z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 sqrdmlsh z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 sqrdmlsh z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sqrdmlsh z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 0.50 sqrdmulh z0.b, z1.b, z2.b
+# CHECK-NEXT: 2 5 1.00 sqrdmulh z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: 1 4 0.50 sqrdmulh z0.h, z1.h, z2.h
+# CHECK-NEXT: 1 4 0.50 sqrdmulh z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 sqrdmulh z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 0.50 sqrdmulh z29.s, z30.s, z31.s
+# CHECK-NEXT: 2 5 1.00 sqrdmulh z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 4 0.25 sqrshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.25 sqrshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.25 sqrshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.25 sqrshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 sqrshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.25 sqrshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.25 sqrshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.25 sqrshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 sqrshrnb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 sqrshrnb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 sqrshrnb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 sqrshrnb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 sqrshrnb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 sqrshrnb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.25 sqrshrnt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 sqrshrnt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 sqrshrnt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 sqrshrnt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 sqrshrnt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 sqrshrnt z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.25 sqrshrunb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 sqrshrunb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 sqrshrunb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 sqrshrunb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 sqrshrunb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 sqrshrunb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.25 sqrshrunt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 sqrshrunt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 sqrshrunt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 sqrshrunt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 sqrshrunt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 sqrshrunt z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.25 sqshl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: 1 4 0.25 sqshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.25 sqshl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: 1 4 0.25 sqshl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: 1 4 0.25 sqshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.25 sqshl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: 1 4 0.25 sqshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.25 sqshl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: 1 4 0.25 sqshl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: 1 4 0.25 sqshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 sqshl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: 1 4 0.25 sqshl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: 1 4 0.25 sqshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.25 sqshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.25 sqshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.25 sqshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 sqshlu z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: 1 4 0.25 sqshlu z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: 1 4 0.25 sqshlu z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: 1 4 0.25 sqshlu z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: 1 4 0.25 sqshlu z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: 1 4 0.25 sqshlu z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: 1 4 0.25 sqshlu z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: 1 4 0.25 sqshlu z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: 1 4 0.25 sqshrnb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 sqshrnb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 sqshrnb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 sqshrnb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 sqshrnb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 sqshrnb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.25 sqshrnt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 sqshrnt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 sqshrnt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 sqshrnt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 sqshrnt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 sqshrnt z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.25 sqshrunb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 sqshrunb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 sqshrunb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 sqshrunb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 sqshrunb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 sqshrunb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.25 sqshrunt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 sqshrunt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 sqshrunt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 sqshrunt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 sqshrunt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 sqshrunt z31.s, z31.d, #32
+# CHECK-NEXT: 1 2 0.25 sqsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 sqsub z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 sqsub z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 sqsub z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.25 sqsub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 sqsub z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 sqsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 sqsub z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.25 sqsub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 sqsub z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.25 sqsub z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.25 sqsub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 sqsub z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 sqsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 sqsub z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.25 sqsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.25 sqsub z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.25 sqsub z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.25 sqsub z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.25 sqsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 sqsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 sqsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 sqsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 sqxtnb z0.b, z31.h
+# CHECK-NEXT: 1 4 0.25 sqxtnb z0.h, z31.s
+# CHECK-NEXT: 1 4 0.25 sqxtnb z0.s, z31.d
+# CHECK-NEXT: 1 4 0.25 sqxtnt z0.b, z31.h
+# CHECK-NEXT: 1 4 0.25 sqxtnt z0.h, z31.s
+# CHECK-NEXT: 1 4 0.25 sqxtnt z0.s, z31.d
+# CHECK-NEXT: 1 4 0.25 sqxtunb z0.b, z31.h
+# CHECK-NEXT: 1 4 0.25 sqxtunb z0.h, z31.s
+# CHECK-NEXT: 1 4 0.25 sqxtunb z0.s, z31.d
+# CHECK-NEXT: 1 4 0.25 sqxtunt z0.b, z31.h
+# CHECK-NEXT: 1 4 0.25 sqxtunt z0.h, z31.s
+# CHECK-NEXT: 1 4 0.25 sqxtunt z0.s, z31.d
+# CHECK-NEXT: 1 2 0.25 srhadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 srhadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 srhadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 srhadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.25 sri z0.b, z0.b, #1
+# CHECK-NEXT: 1 2 0.25 sri z0.d, z0.d, #1
+# CHECK-NEXT: 1 2 0.25 sri z0.h, z0.h, #1
+# CHECK-NEXT: 1 2 0.25 sri z0.s, z0.s, #1
+# CHECK-NEXT: 1 2 0.25 sri z31.b, z31.b, #8
+# CHECK-NEXT: 1 2 0.25 sri z31.d, z31.d, #64
+# CHECK-NEXT: 1 2 0.25 sri z31.h, z31.h, #16
+# CHECK-NEXT: 1 2 0.25 sri z31.s, z31.s, #32
+# CHECK-NEXT: 1 4 0.25 srshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.25 srshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.25 srshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.25 srshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 srshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.25 srshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.25 srshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.25 srshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 srshr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 1 4 0.25 srshr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 srshr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 srshr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 srshr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 1 4 0.25 srshr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 1 4 0.25 srshr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 1 4 0.25 srshr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 1 4 0.50 srsra z0.b, z0.b, #1
+# CHECK-NEXT: 1 4 0.50 srsra z0.d, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 srsra z0.h, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 srsra z0.s, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 srsra z31.b, z31.b, #8
+# CHECK-NEXT: 1 4 0.50 srsra z31.d, z31.d, #64
+# CHECK-NEXT: 1 4 0.50 srsra z31.h, z31.h, #16
+# CHECK-NEXT: 1 4 0.50 srsra z31.s, z31.s, #32
+# CHECK-NEXT: 1 2 0.25 sshllb z0.d, z0.s, #0
+# CHECK-NEXT: 1 2 0.25 sshllb z0.h, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 sshllb z0.s, z0.h, #0
+# CHECK-NEXT: 1 2 0.25 sshllb z31.d, z31.s, #31
+# CHECK-NEXT: 1 2 0.25 sshllb z31.h, z31.b, #7
+# CHECK-NEXT: 1 2 0.25 sshllb z31.s, z31.h, #15
+# CHECK-NEXT: 1 2 0.25 sshllt z0.d, z0.s, #0
+# CHECK-NEXT: 1 2 0.25 sshllt z0.h, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 sshllt z0.s, z0.h, #0
+# CHECK-NEXT: 1 2 0.25 sshllt z31.d, z31.s, #31
+# CHECK-NEXT: 1 2 0.25 sshllt z31.h, z31.b, #7
+# CHECK-NEXT: 1 2 0.25 sshllt z31.s, z31.h, #15
+# CHECK-NEXT: 1 4 0.50 ssra z0.b, z0.b, #1
+# CHECK-NEXT: 1 4 0.50 ssra z0.d, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 ssra z0.h, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 ssra z0.s, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 ssra z31.b, z31.b, #8
+# CHECK-NEXT: 1 4 0.50 ssra z31.d, z31.d, #64
+# CHECK-NEXT: 1 4 0.50 ssra z31.h, z31.h, #16
+# CHECK-NEXT: 1 4 0.50 ssra z31.s, z31.s, #32
+# CHECK-NEXT: 1 2 0.25 ssublb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 ssublb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.25 ssublb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 ssublbt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 ssublbt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 2 0.25 ssublbt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 ssublt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 ssublt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.25 ssublt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 ssubltb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 ssubltb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 2 0.25 ssubltb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 ssubwb z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.25 ssubwb z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.25 ssubwb z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 2 0.25 ssubwt z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.25 ssubwt z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.25 ssubwt z31.d, z31.d, z31.s
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.b }, p0, [x0, x0]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.b }, p0, [x0]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.d }, p0, [x0, x0]
+# CHECK-NEXT: 6 2 1.50 * st1b { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: 6 2 1.50 * st1b { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 6 2 1.50 * st1b { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.d }, p0, [x0]
+# CHECK-NEXT: 6 2 1.50 * st1b { z0.d }, p7, [z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.h }, p0, [x0, x0]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.h }, p0, [x0]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.s }, p0, [x0, x0]
+# CHECK-NEXT: 12 4 3.00 * st1b { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: 12 4 3.00 * st1b { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.s }, p0, [x0]
+# CHECK-NEXT: 12 4 3.00 * st1b { z0.s }, p7, [z0.s]
+# CHECK-NEXT: 2 2 0.50 * st1b { z21.b }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1b { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1b { z21.h }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1b { z21.s }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1b { z31.b }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1b { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 6 2 1.50 * st1b { z31.d }, p7, [z31.d, #31]
+# CHECK-NEXT: 2 2 0.50 * st1b { z31.h }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1b { z31.s }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 12 4 3.00 * st1b { z31.s }, p7, [z31.s, #31]
+# CHECK-NEXT: 2 2 0.50 * st1d { z0.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 6 2 1.50 * st1d { z0.d }, p0, [x0, z0.d, lsl #3]
+# CHECK-NEXT: 6 2 1.50 * st1d { z0.d }, p0, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 6 2 1.50 * st1d { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: 6 2 1.50 * st1d { z0.d }, p0, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 6 2 1.50 * st1d { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 6 2 1.50 * st1d { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1d { z0.d }, p0, [x0]
+# CHECK-NEXT: 6 2 1.50 * st1d { z0.d }, p7, [z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1d { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1d { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 6 2 1.50 * st1d { z31.d }, p7, [z31.d, #248]
+# CHECK-NEXT: 3 2 0.50 * st1h { z0.d }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 6 2 1.50 * st1h { z0.d }, p0, [x0, z0.d, lsl #1]
+# CHECK-NEXT: 6 2 1.50 * st1h { z0.d }, p0, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 6 2 1.50 * st1h { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: 6 2 1.50 * st1h { z0.d }, p0, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 6 2 1.50 * st1h { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 6 2 1.50 * st1h { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1h { z0.d }, p0, [x0]
+# CHECK-NEXT: 6 2 1.50 * st1h { z0.d }, p7, [z0.d]
+# CHECK-NEXT: 3 2 0.50 * st1h { z0.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 2 0.50 * st1h { z0.h }, p0, [x0]
+# CHECK-NEXT: 3 2 0.50 * st1h { z0.s }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 12 4 3.00 * st1h { z0.s }, p0, [x0, z0.s, sxtw #1]
+# CHECK-NEXT: 12 4 3.00 * st1h { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: 12 4 3.00 * st1h { z0.s }, p0, [x0, z0.s, uxtw #1]
+# CHECK-NEXT: 12 4 3.00 * st1h { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 2 0.50 * st1h { z0.s }, p0, [x0]
+# CHECK-NEXT: 12 4 3.00 * st1h { z0.s }, p7, [z0.s]
+# CHECK-NEXT: 2 2 0.50 * st1h { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1h { z21.h }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1h { z21.s }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1h { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 6 2 1.50 * st1h { z31.d }, p7, [z31.d, #62]
+# CHECK-NEXT: 2 2 0.50 * st1h { z31.h }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1h { z31.s }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 12 4 3.00 * st1h { z31.s }, p7, [z31.s, #62]
+# CHECK-NEXT: 2 2 0.50 * st1w { z0.d }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 6 2 1.50 * st1w { z0.d }, p0, [x0, z0.d, lsl #2]
+# CHECK-NEXT: 6 2 1.50 * st1w { z0.d }, p0, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 6 2 1.50 * st1w { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: 6 2 1.50 * st1w { z0.d }, p0, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 6 2 1.50 * st1w { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 6 2 1.50 * st1w { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1w { z0.d }, p0, [x0]
+# CHECK-NEXT: 6 2 1.50 * st1w { z0.d }, p7, [z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1w { z0.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 12 4 3.00 * st1w { z0.s }, p0, [x0, z0.s, sxtw #2]
+# CHECK-NEXT: 12 4 3.00 * st1w { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: 12 4 3.00 * st1w { z0.s }, p0, [x0, z0.s, uxtw #2]
+# CHECK-NEXT: 12 4 3.00 * st1w { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 2 0.50 * st1w { z0.s }, p0, [x0]
+# CHECK-NEXT: 12 4 3.00 * st1w { z0.s }, p7, [z0.s]
+# CHECK-NEXT: 2 2 0.50 * st1w { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1w { z21.s }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1w { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 6 2 1.50 * st1w { z31.d }, p7, [z31.d, #124]
+# CHECK-NEXT: 2 2 0.50 * st1w { z31.s }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 12 4 3.00 * st1w { z31.s }, p7, [z31.s, #124]
+# CHECK-NEXT: 4 4 1.00 * st2b { z0.b, z1.b }, p0, [x0, x0]
+# CHECK-NEXT: 2 4 0.50 * st2b { z0.b, z1.b }, p0, [x0]
+# CHECK-NEXT: 2 4 0.50 * st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 4 4 1.00 * st2b { z5.b, z6.b }, p3, [x17, x16]
+# CHECK-NEXT: 4 4 1.00 * st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 4 0.50 * st2d { z0.d, z1.d }, p0, [x0]
+# CHECK-NEXT: 2 4 0.50 * st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 4 4 1.00 * st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 6 4 1.00 * st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 4 0.50 * st2h { z0.h, z1.h }, p0, [x0]
+# CHECK-NEXT: 2 4 0.50 * st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 6 4 1.00 * st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 4 4 1.00 * st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 4 0.50 * st2w { z0.s, z1.s }, p0, [x0]
+# CHECK-NEXT: 2 4 0.50 * st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 4 4 1.00 * st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 27 7 4.50 * st3b { z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT: 18 7 4.50 * st3b { z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT: 18 7 4.50 * st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 18 7 4.50 * st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 27 7 4.50 * st3b { z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT: 27 7 4.50 * st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 18 7 4.50 * st3d { z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT: 18 7 4.50 * st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 18 7 4.50 * st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 27 7 4.50 * st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 27 7 4.50 * st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 18 7 4.50 * st3h { z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT: 18 7 4.50 * st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 18 7 4.50 * st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 27 7 4.50 * st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 27 7 4.50 * st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 18 7 4.50 * st3w { z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT: 18 7 4.50 * st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 18 7 4.50 * st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 27 7 4.50 * st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 54 11 9.00 * st4b { z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT: 36 11 9.00 * st4b { z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT: 36 11 9.00 * st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 36 11 9.00 * st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 54 11 9.00 * st4b { z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT: 54 11 9.00 * st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 36 11 9.00 * st4d { z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT: 36 11 9.00 * st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 36 11 9.00 * st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 54 11 9.00 * st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 54 11 9.00 * st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 36 11 9.00 * st4h { z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT: 36 11 9.00 * st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 36 11 9.00 * st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 54 11 9.00 * st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 54 11 9.00 * st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 36 11 9.00 * st4w { z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT: 36 11 9.00 * st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 36 11 9.00 * st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 54 11 9.00 * st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 2 2 0.50 * stnt1b { z0.b }, p0, [x0, x0]
+# CHECK-NEXT: 2 2 0.50 * stnt1b { z0.b }, p0, [x0]
+# CHECK-NEXT: 6 2 1.50 * stnt1b { z0.d }, p0, [z1.d]
+# CHECK-NEXT: 12 4 3.00 * stnt1b { z0.s }, p0, [z1.s]
+# CHECK-NEXT: 2 2 0.50 * stnt1b { z21.b }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: 2 2 0.50 * stnt1b { z23.b }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: 6 2 1.50 * stnt1b { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: 6 2 1.50 * stnt1b { z31.d }, p7, [z31.d]
+# CHECK-NEXT: 12 4 3.00 * stnt1b { z31.s }, p7, [z31.s, x0]
+# CHECK-NEXT: 12 4 3.00 * stnt1b { z31.s }, p7, [z31.s]
+# CHECK-NEXT: 2 2 0.50 * stnt1d { z0.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 2 0.50 * stnt1d { z0.d }, p0, [x0]
+# CHECK-NEXT: 6 2 1.50 * stnt1d { z0.d }, p0, [z1.d]
+# CHECK-NEXT: 2 2 0.50 * stnt1d { z21.d }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: 2 2 0.50 * stnt1d { z23.d }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: 6 2 1.50 * stnt1d { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: 6 2 1.50 * stnt1d { z31.d }, p7, [z31.d]
+# CHECK-NEXT: 6 2 1.50 * stnt1h { z0.d }, p0, [z1.d]
+# CHECK-NEXT: 3 2 0.50 * stnt1h { z0.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 2 0.50 * stnt1h { z0.h }, p0, [x0]
+# CHECK-NEXT: 12 4 3.00 * stnt1h { z0.s }, p0, [z1.s]
+# CHECK-NEXT: 2 2 0.50 * stnt1h { z21.h }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: 2 2 0.50 * stnt1h { z23.h }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: 6 2 1.50 * stnt1h { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: 6 2 1.50 * stnt1h { z31.d }, p7, [z31.d]
+# CHECK-NEXT: 12 4 3.00 * stnt1h { z31.s }, p7, [z31.s, x0]
+# CHECK-NEXT: 12 4 3.00 * stnt1h { z31.s }, p7, [z31.s]
+# CHECK-NEXT: 6 2 1.50 * stnt1w { z0.d }, p0, [z1.d]
+# CHECK-NEXT: 2 2 0.50 * stnt1w { z0.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 2 0.50 * stnt1w { z0.s }, p0, [x0]
+# CHECK-NEXT: 12 4 3.00 * stnt1w { z0.s }, p0, [z1.s]
+# CHECK-NEXT: 2 2 0.50 * stnt1w { z21.s }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: 2 2 0.50 * stnt1w { z23.s }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: 6 2 1.50 * stnt1w { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: 6 2 1.50 * stnt1w { z31.d }, p7, [z31.d]
+# CHECK-NEXT: 12 4 3.00 * stnt1w { z31.s }, p7, [z31.s, x0]
+# CHECK-NEXT: 12 4 3.00 * stnt1w { z31.s }, p7, [z31.s]
+# CHECK-NEXT: 1 1 0.50 * str p0, [x0]
+# CHECK-NEXT: 1 1 0.50 * str p15, [sp, #-256, mul vl]
+# CHECK-NEXT: 1 1 0.50 * str p5, [x10, #255, mul vl]
+# CHECK-NEXT: 2 2 0.50 * str z0, [x0]
+# CHECK-NEXT: 2 2 0.50 * str z21, [x10, #-256, mul vl]
+# CHECK-NEXT: 2 2 0.50 * str z31, [sp, #255, mul vl]
+# CHECK-NEXT: 1 2 0.25 sub z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 sub z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 sub z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 sub z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 sub z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.25 sub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 sub z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 sub z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.25 sub z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.25 sub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 sub z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.25 sub z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 sub z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.25 sub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 sub z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 sub z21.b, p5/m, z21.b, z10.b
+# CHECK-NEXT: 1 2 0.25 sub z21.b, z10.b, z21.b
+# CHECK-NEXT: 1 2 0.25 sub z21.d, p5/m, z21.d, z10.d
+# CHECK-NEXT: 1 2 0.25 sub z21.d, z10.d, z21.d
+# CHECK-NEXT: 1 2 0.25 sub z21.h, p5/m, z21.h, z10.h
+# CHECK-NEXT: 1 2 0.25 sub z21.h, z10.h, z21.h
+# CHECK-NEXT: 1 2 0.25 sub z21.s, p5/m, z21.s, z10.s
+# CHECK-NEXT: 1 2 0.25 sub z21.s, z10.s, z21.s
+# CHECK-NEXT: 1 2 0.25 sub z23.b, p3/m, z23.b, z13.b
+# CHECK-NEXT: 1 2 0.25 sub z23.b, z13.b, z8.b
+# CHECK-NEXT: 1 2 0.25 sub z23.d, p3/m, z23.d, z13.d
+# CHECK-NEXT: 1 2 0.25 sub z23.d, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.25 sub z23.h, p3/m, z23.h, z13.h
+# CHECK-NEXT: 1 2 0.25 sub z23.h, z13.h, z8.h
+# CHECK-NEXT: 1 2 0.25 sub z23.s, p3/m, z23.s, z13.s
+# CHECK-NEXT: 1 2 0.25 sub z23.s, z13.s, z8.s
+# CHECK-NEXT: 1 2 0.25 sub z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 sub z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.25 sub z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 sub z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 sub z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.25 sub z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 sub z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 sub z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.25 sub z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 sub z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 sub z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.25 sub z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 subhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 subhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 subhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 subhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 subhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.25 subhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.25 subr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 subr z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 subr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 subr z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.25 subr z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 subr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.25 subr z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.25 subr z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 subr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 subr z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.25 subr z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 subr z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.25 subr z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.25 subr z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.25 subr z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.25 sunpkhi z31.d, z31.s
+# CHECK-NEXT: 1 2 0.25 sunpkhi z31.h, z31.b
+# CHECK-NEXT: 1 2 0.25 sunpkhi z31.s, z31.h
+# CHECK-NEXT: 1 2 0.25 sunpklo z31.d, z31.s
+# CHECK-NEXT: 1 2 0.25 sunpklo z31.h, z31.b
+# CHECK-NEXT: 1 2 0.25 sunpklo z31.s, z31.h
+# CHECK-NEXT: 1 2 0.25 suqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 suqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 suqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 suqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.25 sxtb z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.25 sxtb z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 2 0.25 sxtb z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 2 0.25 sxtb z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 sxtb z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 sxtb z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 sxth z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.25 sxth z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 2 0.25 sxth z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 sxth z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 sxtw z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.25 sxtw z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 tbl z28.b, { z29.b, z30.b }, z31.b
+# CHECK-NEXT: 1 2 0.25 tbl z28.d, { z29.d, z30.d }, z31.d
+# CHECK-NEXT: 1 2 0.25 tbl z28.h, { z29.h, z30.h }, z31.h
+# CHECK-NEXT: 1 2 0.25 tbl z28.s, { z29.s, z30.s }, z31.s
+# CHECK-NEXT: 1 2 0.25 tbl z31.b, { z31.b }, z31.b
+# CHECK-NEXT: 1 2 0.25 tbl z31.d, { z31.d }, z31.d
+# CHECK-NEXT: 1 2 0.25 tbl z31.h, { z31.h }, z31.h
+# CHECK-NEXT: 1 2 0.25 tbl z31.s, { z31.s }, z31.s
+# CHECK-NEXT: 1 2 0.25 tbx z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 tbx z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 tbx z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 tbx z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 trn1 p15.b, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 trn1 p15.d, p15.d, p15.d
+# CHECK-NEXT: 1 2 0.50 trn1 p15.h, p15.h, p15.h
+# CHECK-NEXT: 1 2 0.50 trn1 p15.s, p15.s, p15.s
+# CHECK-NEXT: 1 2 0.25 trn1 z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 trn1 z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 trn1 z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 trn1 z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 trn2 p15.b, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 trn2 p15.d, p15.d, p15.d
+# CHECK-NEXT: 1 2 0.50 trn2 p15.h, p15.h, p15.h
+# CHECK-NEXT: 1 2 0.50 trn2 p15.s, p15.s, p15.s
+# CHECK-NEXT: 1 2 0.25 trn2 z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 trn2 z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 trn2 z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 trn2 z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 4 0.25 uaba z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.25 uaba z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.25 uaba z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 uaba z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.25 uabalb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.25 uabalb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.25 uabalb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.25 uabalt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.25 uabalt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.25 uabalt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.25 uabd z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 uabd z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 uabd z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 uabd z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 uabdlb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 uabdlb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.25 uabdlb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 uabdlt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 uabdlt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.25 uabdlt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 4 0.25 uadalp z0.h, p0/m, z1.b
+# CHECK-NEXT: 1 4 0.25 uadalp z29.s, p0/m, z30.h
+# CHECK-NEXT: 1 4 0.25 uadalp z30.d, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 uaddlb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 uaddlb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.25 uaddlb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 uaddlt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 uaddlt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.25 uaddlt z31.d, z31.s, z31.s
+# CHECK-NEXT: 6 9 2.00 uaddv d0, p7, z31.b
+# CHECK-NEXT: 2 4 0.50 uaddv d0, p7, z31.d
+# CHECK-NEXT: 4 8 1.00 uaddv d0, p7, z31.h
+# CHECK-NEXT: 4 6 1.00 uaddv d0, p7, z31.s
+# CHECK-NEXT: 1 2 0.25 uaddwb z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.25 uaddwb z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.25 uaddwb z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 2 0.25 uaddwt z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.25 uaddwt z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.25 uaddwt z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 3 0.50 ucvtf z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 3 0.50 ucvtf z0.d, p0/m, z0.s
+# CHECK-NEXT: 1 3 0.50 ucvtf z0.h, p0/m, z0.d
+# CHECK-NEXT: 4 6 2.00 ucvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: 2 4 1.00 ucvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: 1 3 0.50 ucvtf z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 1.00 ucvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 20 20.00 udiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 12 11.00 udiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 20 20.00 udivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 12 11.00 udivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 3 0.50 udot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: 1 3 0.50 udot z0.d, z1.h, z31.h
+# CHECK-NEXT: 1 3 0.25 udot z0.s, z1.b, z31.b
+# CHECK-NEXT: 1 3 0.25 udot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: 1 2 0.25 uhadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 uhadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 uhadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 uhadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.25 uhsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 uhsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 uhsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 uhsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.25 uhsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 uhsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 uhsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 uhsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.25 umax z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 umax z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 umax z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.25 umax z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 umax z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 umax z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 umaxp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 umaxp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 umaxp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 umaxp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 6 9 2.00 umaxv b0, p7, z31.b
+# CHECK-NEXT: 2 4 0.50 umaxv d0, p7, z31.d
+# CHECK-NEXT: 4 8 1.00 umaxv h0, p7, z31.h
+# CHECK-NEXT: 4 6 1.00 umaxv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.25 umin z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 umin z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 umin z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.25 umin z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 umin z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 umin z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 uminp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 uminp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 uminp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 uminp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 6 9 2.00 uminv b0, p7, z31.b
+# CHECK-NEXT: 2 4 0.50 uminv d0, p7, z31.d
+# CHECK-NEXT: 4 8 1.00 uminv h0, p7, z31.h
+# CHECK-NEXT: 4 6 1.00 uminv s0, p7, z31.s
+# CHECK-NEXT: 1 4 0.50 umlalb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 umlalb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 umlalb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 umlalb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 umlalb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 umlalt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 umlalt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 umlalt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 umlalt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 umlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 umlslb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 umlslb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 umlslb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 umlslb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 umlslb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 umlslt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 umlslt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 umlslt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 umlslt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 umlslt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 3 0.25 ummla z0.s, z1.b, z2.b
+# CHECK-NEXT: 1 4 0.50 umulh z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: 1 4 0.50 umulh z0.b, z1.b, z2.b
+# CHECK-NEXT: 2 5 1.00 umulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 4 0.50 umulh z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 4 0.50 umulh z0.h, z1.h, z2.h
+# CHECK-NEXT: 1 4 0.50 umulh z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 4 0.50 umulh z29.s, z30.s, z31.s
+# CHECK-NEXT: 2 5 1.00 umulh z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 4 0.50 umullb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 umullb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 4 0.50 umullb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 umullb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.50 umullb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 4 0.50 umullt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 0.50 umullt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 4 0.50 umullt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 umullt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.50 umullt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 uqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 uqadd z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 uqadd z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 uqadd z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.25 uqadd z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 uqadd z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 uqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 uqadd z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.25 uqadd z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 uqadd z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.25 uqadd z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.25 uqadd z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 uqadd z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 uqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 uqadd z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.25 uqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.25 uqadd z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.25 uqadd z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.25 uqadd z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.50 uqdecb w0
+# CHECK-NEXT: 1 2 0.50 uqdecb w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecb w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecb w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecb x0
+# CHECK-NEXT: 1 2 0.50 uqdecb x0, #14
+# CHECK-NEXT: 1 2 0.50 uqdecb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecb x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecb x0, vl1
+# CHECK-NEXT: 1 2 0.50 uqdecd w0
+# CHECK-NEXT: 1 2 0.50 uqdecd w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecd w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecd w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecd x0
+# CHECK-NEXT: 1 2 0.50 uqdecd x0, #14
+# CHECK-NEXT: 1 2 0.50 uqdecd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecd x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecd x0, vl1
+# CHECK-NEXT: 1 2 0.25 uqdecd z0.d
+# CHECK-NEXT: 1 2 0.25 uqdecd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.25 uqdecd z0.d, pow2
+# CHECK-NEXT: 1 2 0.25 uqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdech w0
+# CHECK-NEXT: 1 2 0.50 uqdech w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdech w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdech w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdech x0
+# CHECK-NEXT: 1 2 0.50 uqdech x0, #14
+# CHECK-NEXT: 1 2 0.50 uqdech x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdech x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdech x0, vl1
+# CHECK-NEXT: 1 2 0.25 uqdech z0.h
+# CHECK-NEXT: 1 2 0.25 uqdech z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.25 uqdech z0.h, pow2
+# CHECK-NEXT: 1 2 0.25 uqdech z0.h, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecp wzr, p15.b
+# CHECK-NEXT: 1 2 0.50 uqdecp wzr, p15.d
+# CHECK-NEXT: 1 2 0.50 uqdecp wzr, p15.h
+# CHECK-NEXT: 1 2 0.50 uqdecp wzr, p15.s
+# CHECK-NEXT: 1 2 0.50 uqdecp x0, p0.b
+# CHECK-NEXT: 1 2 0.50 uqdecp x0, p0.d
+# CHECK-NEXT: 1 2 0.50 uqdecp x0, p0.h
+# CHECK-NEXT: 1 2 0.50 uqdecp x0, p0.s
+# CHECK-NEXT: 3 7 1.00 uqdecp z0.d, p0.d
+# CHECK-NEXT: 3 7 1.00 uqdecp z0.h, p0.h
+# CHECK-NEXT: 3 7 1.00 uqdecp z0.s, p0.s
+# CHECK-NEXT: 1 2 0.50 uqdecw w0
+# CHECK-NEXT: 1 2 0.50 uqdecw w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecw w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecw w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecw x0
+# CHECK-NEXT: 1 2 0.50 uqdecw x0, #14
+# CHECK-NEXT: 1 2 0.50 uqdecw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecw x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecw x0, vl1
+# CHECK-NEXT: 1 2 0.25 uqdecw z0.s
+# CHECK-NEXT: 1 2 0.25 uqdecw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.25 uqdecw z0.s, pow2
+# CHECK-NEXT: 1 2 0.25 uqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincb w0
+# CHECK-NEXT: 1 2 0.50 uqincb w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincb w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqincb w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincb x0
+# CHECK-NEXT: 1 2 0.50 uqincb x0, #14
+# CHECK-NEXT: 1 2 0.50 uqincb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincb x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqincb x0, vl1
+# CHECK-NEXT: 1 2 0.50 uqincd w0
+# CHECK-NEXT: 1 2 0.50 uqincd w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincd w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqincd w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincd x0
+# CHECK-NEXT: 1 2 0.50 uqincd x0, #14
+# CHECK-NEXT: 1 2 0.50 uqincd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincd x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqincd x0, vl1
+# CHECK-NEXT: 1 2 0.25 uqincd z0.d
+# CHECK-NEXT: 1 2 0.25 uqincd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.25 uqincd z0.d, pow2
+# CHECK-NEXT: 1 2 0.25 uqincd z0.d, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqinch w0
+# CHECK-NEXT: 1 2 0.50 uqinch w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqinch w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqinch w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqinch x0
+# CHECK-NEXT: 1 2 0.50 uqinch x0, #14
+# CHECK-NEXT: 1 2 0.50 uqinch x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqinch x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqinch x0, vl1
+# CHECK-NEXT: 1 2 0.25 uqinch z0.h
+# CHECK-NEXT: 1 2 0.25 uqinch z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.25 uqinch z0.h, pow2
+# CHECK-NEXT: 1 2 0.25 uqinch z0.h, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincp wzr, p15.b
+# CHECK-NEXT: 1 2 0.50 uqincp wzr, p15.d
+# CHECK-NEXT: 1 2 0.50 uqincp wzr, p15.h
+# CHECK-NEXT: 1 2 0.50 uqincp wzr, p15.s
+# CHECK-NEXT: 1 2 0.50 uqincp x0, p0.b
+# CHECK-NEXT: 1 2 0.50 uqincp x0, p0.d
+# CHECK-NEXT: 1 2 0.50 uqincp x0, p0.h
+# CHECK-NEXT: 1 2 0.50 uqincp x0, p0.s
+# CHECK-NEXT: 3 7 1.00 uqincp z0.d, p0.d
+# CHECK-NEXT: 3 7 1.00 uqincp z0.h, p0.h
+# CHECK-NEXT: 3 7 1.00 uqincp z0.s, p0.s
+# CHECK-NEXT: 1 2 0.50 uqincw w0
+# CHECK-NEXT: 1 2 0.50 uqincw w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincw w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqincw w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincw x0
+# CHECK-NEXT: 1 2 0.50 uqincw x0, #14
+# CHECK-NEXT: 1 2 0.50 uqincw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincw x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqincw x0, vl1
+# CHECK-NEXT: 1 2 0.25 uqincw z0.s
+# CHECK-NEXT: 1 2 0.25 uqincw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.25 uqincw z0.s, pow2
+# CHECK-NEXT: 1 2 0.25 uqincw z0.s, pow2, mul #16
+# CHECK-NEXT: 1 4 0.25 uqrshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.25 uqrshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.25 uqrshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.25 uqrshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 uqrshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.25 uqrshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.25 uqrshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.25 uqrshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 uqrshrnb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 uqrshrnb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 uqrshrnb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 uqrshrnb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 uqrshrnb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 uqrshrnb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.25 uqrshrnt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 uqrshrnt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 uqrshrnt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 uqrshrnt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 uqrshrnt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 uqrshrnt z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.25 uqshl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: 1 4 0.25 uqshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.25 uqshl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: 1 4 0.25 uqshl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: 1 4 0.25 uqshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.25 uqshl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: 1 4 0.25 uqshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.25 uqshl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: 1 4 0.25 uqshl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: 1 4 0.25 uqshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 uqshl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: 1 4 0.25 uqshl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: 1 4 0.25 uqshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.25 uqshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.25 uqshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.25 uqshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 uqshrnb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 uqshrnb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 uqshrnb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 uqshrnb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 uqshrnb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 uqshrnb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.25 uqshrnt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 uqshrnt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 uqshrnt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 uqshrnt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.25 uqshrnt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.25 uqshrnt z31.s, z31.d, #32
+# CHECK-NEXT: 1 2 0.25 uqsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 uqsub z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 uqsub z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 uqsub z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.25 uqsub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 uqsub z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 uqsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 uqsub z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.25 uqsub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 uqsub z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.25 uqsub z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.25 uqsub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.25 uqsub z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 uqsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 uqsub z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.25 uqsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.25 uqsub z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.25 uqsub z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.25 uqsub z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.25 uqsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 uqsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 uqsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 uqsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 uqxtnb z0.b, z31.h
+# CHECK-NEXT: 1 4 0.25 uqxtnb z0.h, z31.s
+# CHECK-NEXT: 1 4 0.25 uqxtnb z0.s, z31.d
+# CHECK-NEXT: 1 4 0.25 uqxtnt z0.b, z31.h
+# CHECK-NEXT: 1 4 0.25 uqxtnt z0.h, z31.s
+# CHECK-NEXT: 1 4 0.25 uqxtnt z0.s, z31.d
+# CHECK-NEXT: 2 4 1.00 urecpe z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 urhadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 urhadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 urhadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 urhadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 urshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.25 urshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.25 urshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.25 urshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 urshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.25 urshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.25 urshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.25 urshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.25 urshr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 1 4 0.25 urshr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 1 4 0.25 urshr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 1 4 0.25 urshr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 1 4 0.25 urshr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 1 4 0.25 urshr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 1 4 0.25 urshr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 1 4 0.25 urshr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 2 4 1.00 ursqrte z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 4 0.50 ursra z0.b, z0.b, #1
+# CHECK-NEXT: 1 4 0.50 ursra z0.d, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 ursra z0.h, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 ursra z0.s, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 ursra z31.b, z31.b, #8
+# CHECK-NEXT: 1 4 0.50 ursra z31.d, z31.d, #64
+# CHECK-NEXT: 1 4 0.50 ursra z31.h, z31.h, #16
+# CHECK-NEXT: 1 4 0.50 ursra z31.s, z31.s, #32
+# CHECK-NEXT: 1 2 0.25 ushllb z0.d, z0.s, #0
+# CHECK-NEXT: 1 2 0.25 ushllb z0.h, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 ushllb z0.s, z0.h, #0
+# CHECK-NEXT: 1 2 0.25 ushllb z31.d, z31.s, #31
+# CHECK-NEXT: 1 2 0.25 ushllb z31.h, z31.b, #7
+# CHECK-NEXT: 1 2 0.25 ushllb z31.s, z31.h, #15
+# CHECK-NEXT: 1 2 0.25 ushllt z0.d, z0.s, #0
+# CHECK-NEXT: 1 2 0.25 ushllt z0.h, z0.b, #0
+# CHECK-NEXT: 1 2 0.25 ushllt z0.s, z0.h, #0
+# CHECK-NEXT: 1 2 0.25 ushllt z31.d, z31.s, #31
+# CHECK-NEXT: 1 2 0.25 ushllt z31.h, z31.b, #7
+# CHECK-NEXT: 1 2 0.25 ushllt z31.s, z31.h, #15
+# CHECK-NEXT: 1 3 0.25 usmmla z0.s, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 usqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.25 usqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.25 usqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.25 usqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 usra z0.b, z0.b, #1
+# CHECK-NEXT: 1 4 0.50 usra z0.d, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 usra z0.h, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 usra z0.s, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 usra z31.b, z31.b, #8
+# CHECK-NEXT: 1 4 0.50 usra z31.d, z31.d, #64
+# CHECK-NEXT: 1 4 0.50 usra z31.h, z31.h, #16
+# CHECK-NEXT: 1 4 0.50 usra z31.s, z31.s, #32
+# CHECK-NEXT: 1 2 0.25 usublb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 usublb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.25 usublb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 usublt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.25 usublt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.25 usublt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.25 usubwb z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.25 usubwb z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.25 usubwb z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 2 0.25 usubwt z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.25 usubwt z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.25 usubwt z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 2 0.25 uunpkhi z31.d, z31.s
+# CHECK-NEXT: 1 2 0.25 uunpkhi z31.h, z31.b
+# CHECK-NEXT: 1 2 0.25 uunpkhi z31.s, z31.h
+# CHECK-NEXT: 1 2 0.25 uunpklo z31.d, z31.s
+# CHECK-NEXT: 1 2 0.25 uunpklo z31.h, z31.b
+# CHECK-NEXT: 1 2 0.25 uunpklo z31.s, z31.h
+# CHECK-NEXT: 1 2 0.25 uxtb z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.25 uxtb z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 2 0.25 uxtb z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 2 0.25 uxtb z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 uxtb z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.25 uxtb z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 uxth z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.25 uxth z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 2 0.25 uxth z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.25 uxth z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.25 uxtw z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.25 uxtw z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 uzp1 p15.b, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 uzp1 p15.d, p15.d, p15.d
+# CHECK-NEXT: 1 2 0.50 uzp1 p15.h, p15.h, p15.h
+# CHECK-NEXT: 1 2 0.50 uzp1 p15.s, p15.s, p15.s
+# CHECK-NEXT: 1 2 0.25 uzp1 z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 uzp1 z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 uzp1 z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 uzp1 z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 uzp2 p15.b, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 uzp2 p15.d, p15.d, p15.d
+# CHECK-NEXT: 1 2 0.50 uzp2 p15.h, p15.h, p15.h
+# CHECK-NEXT: 1 2 0.50 uzp2 p15.s, p15.s, p15.s
+# CHECK-NEXT: 1 2 0.25 uzp2 z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 uzp2 z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 uzp2 z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 uzp2 z31.s, z31.s, z31.s
+# CHECK-NEXT: 2 3 1.00 whilege p15.b, w0, wzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.b, wzr, w0
+# CHECK-NEXT: 2 3 1.00 whilege p15.b, x0, xzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.b, xzr, x0
+# CHECK-NEXT: 2 3 1.00 whilege p15.d, w0, wzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.d, x0, xzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.h, w0, wzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.h, x0, xzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.s, w0, wzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.s, x0, xzr
+# CHECK-NEXT: 2 3 1.00 whilerw p15.b, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilerw p15.d, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilerw p15.h, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilerw p15.s, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilewr p15.b, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilewr p15.d, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilewr p15.h, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilewr p15.s, x30, x30
+# CHECK-NEXT: 1 2 1.00 * U wrffr p0.b
+# CHECK-NEXT: 1 2 1.00 * U wrffr p15.b
+# CHECK-NEXT: 1 2 0.25 xar z0.b, z0.b, z1.b, #1
+# CHECK-NEXT: 1 2 0.25 xar z0.d, z0.d, z1.d, #1
+# CHECK-NEXT: 1 2 0.25 xar z0.h, z0.h, z1.h, #1
+# CHECK-NEXT: 1 2 0.25 xar z0.s, z0.s, z1.s, #1
+# CHECK-NEXT: 1 2 0.25 xar z31.b, z31.b, z30.b, #8
+# CHECK-NEXT: 1 2 0.25 xar z31.d, z31.d, z30.d, #64
+# CHECK-NEXT: 1 2 0.25 xar z31.h, z31.h, z30.h, #16
+# CHECK-NEXT: 1 2 0.25 xar z31.s, z31.s, z30.s, #32
+# CHECK-NEXT: 1 2 0.50 zip1 p0.b, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 zip1 p0.d, p0.d, p0.d
+# CHECK-NEXT: 1 2 0.50 zip1 p0.h, p0.h, p0.h
+# CHECK-NEXT: 1 2 0.50 zip1 p0.s, p0.s, p0.s
+# CHECK-NEXT: 1 2 0.50 zip1 p15.b, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 zip1 p15.d, p15.d, p15.d
+# CHECK-NEXT: 1 2 0.50 zip1 p15.h, p15.h, p15.h
+# CHECK-NEXT: 1 2 0.50 zip1 p15.s, p15.s, p15.s
+# CHECK-NEXT: 1 2 0.25 zip1 z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 zip1 z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 zip1 z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.25 zip1 z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 zip1 z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 zip1 z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 zip1 z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 zip1 z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 zip2 p0.b, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 zip2 p0.d, p0.d, p0.d
+# CHECK-NEXT: 1 2 0.50 zip2 p0.h, p0.h, p0.h
+# CHECK-NEXT: 1 2 0.50 zip2 p0.s, p0.s, p0.s
+# CHECK-NEXT: 1 2 0.50 zip2 p15.b, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 zip2 p15.d, p15.d, p15.d
+# CHECK-NEXT: 1 2 0.50 zip2 p15.h, p15.h, p15.h
+# CHECK-NEXT: 1 2 0.50 zip2 p15.s, p15.s, p15.s
+# CHECK-NEXT: 1 2 0.25 zip2 z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.25 zip2 z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.25 zip2 z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.25 zip2 z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.25 zip2 z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.25 zip2 z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.25 zip2 z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.25 zip2 z31.s, z31.s, z31.s
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - 245.00 245.00 696.50 400.88 303.88 41.88 41.88 41.88 41.88 41.88 41.88 451.50 1606.00 1268.50 817.00 681.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs z0.b, p0/m, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 abs z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adclb z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adclb z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adclt z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adclt z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z0.s, z1.s, z2.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z21.b, p5/m, z21.b, z10.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z21.b, z10.b, z21.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z21.d, p5/m, z21.d, z10.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z21.d, z10.d, z21.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z21.h, p5/m, z21.h, z10.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z21.h, z10.h, z21.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z21.s, p5/m, z21.s, z10.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z21.s, z10.s, z21.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z23.b, p3/m, z23.b, z13.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z23.b, z13.b, z8.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z23.d, p3/m, z23.d, z13.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z23.h, p3/m, z23.h, z13.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z23.h, z13.h, z8.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z23.s, p3/m, z23.s, z13.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z23.s, z13.s, z8.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 add z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 addp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addpl sp, sp, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addpl x0, x0, #-32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addpl x21, x21, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addpl x23, x8, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addvl sp, sp, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addvl x0, x0, #-32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addvl x21, x21, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - addvl x23, x8, #-1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.d, [z0.d, z0.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.d, [z0.d, z0.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.d, [z0.d, z0.d, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.d, [z0.d, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.d, [z0.d, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.d, [z0.d, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.d, [z0.d, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.d, [z0.d, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.d, [z0.d, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.d, [z0.d, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.d, [z0.d, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.d, [z0.d, z0.d]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.s, [z0.s, z0.s, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.s, [z0.s, z0.s, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.s, [z0.s, z0.s, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 adr z0.s, [z0.s, z0.s]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aesd z0.b, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aese z0.b, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aesimc z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aesimc z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aesmc z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 aesmc z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - and p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z0.d, z0.d, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z0.s, z0.s, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z23.h, z23.h, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z23.h, z23.h, #0xfff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z5.b, z5.b, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 and z5.b, z5.b, #0xf9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ands p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 andv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 andv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 andv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 andv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.b, z1.b, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.h, z1.h, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z0.s, z1.s, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asr z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 asrd z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 asrd z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 asrd z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 asrd z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 asrd z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 asrd z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 asrd z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 asrd z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 asrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bcax z29.d, z29.d, z30.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - bdep z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - bdep z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - bdep z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - bdep z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - bext z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - bext z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - bext z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - bext z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - bfcvt z0.h, p0/m, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - bfcvtnt z0.h, p0/m, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfdot z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfdot z0.s, z1.h, z2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalb z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalb z0.s, z1.h, z2.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalb z10.s, z21.h, z14.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalb z21.s, z14.h, z3.h[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalt z0.s, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalt z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalt z0.s, z1.h, z2.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmlalt z14.s, z10.h, z21.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - bgrp z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - bgrp z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - bgrp z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - bgrp z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bic p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bic p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bic z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bic z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bic z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bic z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bic z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bic z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bics p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - bics p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brka p0.b, p15/m, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brka p0.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkas p0.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkb p0.b, p15/m, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkb p0.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkbs p0.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkn p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkn p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkns p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkpa p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkpa p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkpas p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkpas p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkpb p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkpb p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkpbs p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - brkpbs p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bsl z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bsl1n z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 bsl2n z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cadd z0.b, z0.b, z0.b, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cadd z0.d, z0.d, z0.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cadd z0.h, z0.h, z0.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cadd z0.s, z0.s, z0.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cadd z31.b, z31.b, z31.b, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cadd z31.d, z31.d, z31.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cadd z31.h, z31.h, z31.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cadd z31.s, z31.s, z31.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cdot z0.d, z1.h, z15.h[1], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cdot z0.d, z1.h, z31.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cdot z0.d, z1.h, z31.h, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cdot z0.d, z1.h, z31.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cdot z0.d, z1.h, z31.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cdot z0.s, z1.b, z31.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cdot z0.s, z1.b, z7.b[3], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cdot z29.d, z30.h, z0.h[0], #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cdot z31.d, z30.h, z7.h[1], #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cdot z5.d, z6.h, z3.h[0], #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clasta b0, p7, b0, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clasta d0, p7, d0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clasta h0, p7, h0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clasta s0, p7, s0, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 - - clasta w0, p7, w0, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 - - clasta w0, p7, w0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 - - clasta w0, p7, w0, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 - - clasta x0, p7, x0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clasta z0.b, p7, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clasta z0.d, p7, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clasta z0.h, p7, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clasta z0.s, p7, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clastb b0, p7, b0, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clastb d0, p7, d0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clastb h0, p7, h0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clastb s0, p7, s0, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 - - clastb w0, p7, w0, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 - - clastb w0, p7, w0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 - - clastb w0, p7, w0, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 - - clastb x0, p7, x0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clastb z0.b, p7, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clastb z0.d, p7, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clastb z0.h, p7, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - clastb z0.s, p7, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cls z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cls z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cls z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cls z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 clz z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 clz z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 clz z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 clz z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z0.b, z1.b, z2.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - cmla z0.d, z1.d, z2.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z0.h, z1.h, z2.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z0.h, z1.h, z2.h[0], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z0.s, z1.s, z2.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z0.s, z1.s, z2.s[0], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z15.b, z16.b, z17.b, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - cmla z15.d, z16.d, z17.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z15.h, z16.h, z17.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z15.s, z16.s, z17.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z29.b, z30.b, z31.b, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - cmla z29.d, z30.d, z31.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z29.h, z30.h, z31.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z29.s, z30.s, z31.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z31.b, z31.b, z31.b, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - cmla z31.d, z31.d, z31.d, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z31.h, z30.h, z7.h[0], #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z31.h, z31.h, z31.h, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z31.s, z30.s, z7.s[0], #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - cmla z31.s, z31.s, z31.s, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpeq p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphi p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmphs p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmple p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmple p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmple p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmple p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmple p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmple p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmple p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmple p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmple p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmple p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmple p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplo p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplo p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplo p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplo p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplo p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplo p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplo p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplo p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplo p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplo p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplo p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpls p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpls p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpls p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpls p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpls p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpls p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpls p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpls p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpls p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpls p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpls p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplt p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplt p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplt p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplt p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplt p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplt p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplt p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplt p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplt p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplt p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmplt p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - cmpne p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cnot z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cnot z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cnot z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cnot z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cnt z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cnt z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cnt z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 cnt z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntb x0, #28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntd x0, #28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cnth x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cnth x0, #28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cnth x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cnth x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntp x0, p15, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntp x0, p15, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntp x0, p15, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntp x0, p15, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntw x0, #28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - cntw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - compact z31.d, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - compact z31.s, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - ctermeq w30, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - ctermeq wzr, w30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - ctermeq x30, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - ctermeq xzr, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - ctermne w30, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - ctermne wzr, w30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - ctermne x30, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - ctermne xzr, x30
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - decb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decb x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decb x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - decd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decd x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decd x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - dech x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - dech x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - dech x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - dech x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - dech x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decp x0, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decp x0, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decp x0, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decp x0, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decp xzr, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decp xzr, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decp xzr, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decp xzr, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 decp z31.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 decp z31.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 decp z31.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - decw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decw x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - decw x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 dupm z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 dupm z0.s, #0xfffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 dupm z23.h, #0xfff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 dupm z5.b, #0xf9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - eor p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z0.d, z0.d, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z0.s, z0.s, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z23.h, z23.h, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z23.h, z23.h, #0xfff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z5.b, z5.b, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor z5.b, z5.b, #0xf9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eor3 z29.d, z29.d, z30.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eorbt z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eorbt z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eorbt z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eorbt z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - eors p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eortb z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eortb z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eortb z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 eortb z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 eorv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 eorv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 eorv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 eorv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ext z0.b, { z1.b, z2.b }, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ext z31.b, z31.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ext z31.b, z31.b, z0.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ext z31.b, { z30.b, z31.b }, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabd z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabd z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabd z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabs z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabs z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fabs z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - facge p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - facge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - facge p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - facge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - facge p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - facge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - facgt p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - facgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - facgt p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - facgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - facgt p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - facgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadd z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fadda d0, p7, d0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 9.00 - - fadda h0, p7, h0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 5.00 - - fadda s0, p7, s0, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 faddp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 faddp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 faddp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 faddv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 faddv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.75 0.75 0.75 0.75 faddv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcadd z0.d, p0/m, z0.d, z0.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcadd z0.h, p0/m, z0.h, z0.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcadd z0.s, p0/m, z0.s, z0.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcadd z31.d, p7/m, z31.d, z31.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcadd z31.h, p7/m, z31.h, z31.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcadd z31.s, p7/m, z31.s, z31.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmeq p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmeq p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmeq p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmeq p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmeq p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmeq p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmge p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmge p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmge p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmge p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmge p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmge p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z0.d, p0/m, z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z0.h, p0/m, z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z0.h, p0/m, z1.h, z2.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z0.h, z0.h, z0.h[0], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z0.s, p0/m, z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z0.s, p0/m, z1.s, z2.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z21.s, z10.s, z5.s[1], #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z23.s, z13.s, z8.s[0], #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z29.d, p7/m, z30.d, z31.d, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z29.h, p7/m, z30.h, z31.h, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z29.s, p7/m, z30.s, z31.s, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z31.d, p7/m, z31.d, z31.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z31.h, p7/m, z31.h, z31.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z31.h, z31.h, z7.h[3], #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmla z31.s, p7/m, z31.s, z31.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmle p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmle p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmle p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmlt p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmlt p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmlt p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmne p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmne p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmne p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmne p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmne p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmne p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmuo p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmuo p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - fcmuo p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt z0.d, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt z0.h, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvt z0.h, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvt z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvt z0.s, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtlt z0.s, p0/m, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtlt z30.d, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtnt z0.h, p0/m, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtnt z30.s, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtx z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtx z30.s, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtxnt z0.s, p0/m, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtxnt z30.s, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs z0.d, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzs z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs z0.s, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu z0.d, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzu z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu z0.s, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - fdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 8.00 - - fdiv z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 - - fdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - fdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 8.00 - - fdivr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 - - fdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - fexpa z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - fexpa z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - fexpa z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - flogb z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - flogb z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - flogb z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmax z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnm z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnmp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnmp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxnmp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxnmv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 fmaxnmv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.75 0.75 0.75 0.75 fmaxnmv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmaxp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fmaxv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 fmaxv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.75 0.75 0.75 0.75 fmaxv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmin z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnm z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnmp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnmp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminnmp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fminnmv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 fminnmv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.75 0.75 0.75 0.75 fminnmv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fminp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 fminv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 1.00 1.00 fminv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.75 0.75 0.75 0.75 fminv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmla z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmla z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmla z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlalb z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlalb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlalb z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlalt z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlalt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlalt z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmls z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmls z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmls z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlslb z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlslb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlslb z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlslt z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlslt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmlslt z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov z0.d, #-10.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov z0.d, #0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov z0.d, p0/m, #-10.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov z0.d, p0/m, #0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov z0.h, #-0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov z0.h, p0/m, #-0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov z0.s, #-0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmov z0.s, p0/m, #-0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmsb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmsb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmsb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z0.d, z0.d, z0.d[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z0.h, z0.h, z0.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z0.s, z0.s, z0.s[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z31.d, p7/m, z31.d, #2.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z31.d, z31.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z31.h, p7/m, z31.h, #2.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z31.h, z31.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z31.s, p7/m, z31.s, #2.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmul z31.s, z31.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmulx z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmulx z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fmulx z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fneg z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmsb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmsb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fnmsb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frecpe z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frecpe z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frecpe z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frecps z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frecps z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frecps z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frecpx z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frecpx z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frecpx z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frinta z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frinta z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frinta z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frinti z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frinti z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frinti z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintm z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frintm z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintm z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintn z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frintn z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintn z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintp z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frintp z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintp z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintx z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frintx z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintx z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frintz z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frintz z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frintz z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - frsqrte z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - frsqrte z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - frsqrte z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frsqrts z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frsqrts z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 frsqrts z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 - - fsqrt z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 8.00 - - fsqrt z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 - - fsqrt z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsubr z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsubr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsubr z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsubr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsubr z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsubr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsubr z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsubr z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsubr z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ftmad z0.d, z0.d, z31.d, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ftmad z0.h, z0.h, z31.h, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ftmad z0.s, z0.s, z31.s, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ftsmul z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ftsmul z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ftsmul z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ftssel z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ftssel z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ftssel z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 histcnt z0.s, p0/z, z1.s, z2.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 histcnt z29.d, p7/z, z30.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 histseg z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - incb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incb x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incb x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - incd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incd x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incd x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 incd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 incd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - inch x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - inch x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - inch x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - inch x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - inch x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 inch z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 inch z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incp x0, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incp x0, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incp x0, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incp x0, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incp xzr, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incp xzr, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incp xzr, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incp xzr, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 incp z31.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 incp z31.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 incp z31.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - incw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incw x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - incw x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 incw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 incw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - index z0.b, #0, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - index z0.d, #0, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - index z0.h, #0, #0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z0.h, w0, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - index z0.s, #0, #0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z21.b, w10, w21
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - - - - - - 1.00 - 1.00 - index z21.d, x10, x21
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z21.s, w10, w21
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z23.b, #13, w8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z23.b, w13, #8
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - - - - - - 1.00 - 1.00 - index z23.d, #13, x8
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - - - - - - 1.00 - 1.00 - index z23.d, x13, #8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z23.h, #13, w8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z23.h, w13, #8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z23.s, #13, w8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z23.s, w13, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - index z31.b, #-1, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z31.b, #-1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z31.b, wzr, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z31.b, wzr, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - index z31.d, #-1, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - - - - - - 1.00 - 1.00 - index z31.d, #-1, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - - - - - - 1.00 - 1.00 - index z31.d, xzr, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - - - - - - 1.00 - 1.00 - index z31.d, xzr, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - index z31.h, #-1, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z31.h, #-1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z31.h, wzr, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z31.h, wzr, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - index z31.s, #-1, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z31.s, #-1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z31.s, wzr, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 - 0.50 - index z31.s, wzr, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - insr z0.b, w0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - insr z0.d, x0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - insr z0.h, w0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - insr z0.s, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 insr z31.b, b31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - insr z31.b, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 insr z31.d, d31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - insr z31.d, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 insr z31.h, h31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - insr z31.h, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 insr z31.s, s31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - insr z31.s, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - lasta b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - lasta d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - lasta h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - lasta s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - lasta w0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - lasta w0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - lasta w0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - lasta x0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - lastb b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - lastb d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - lastb h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - lastb s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - lastb w0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - lastb w0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - lastb w0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 - - lastb x0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z0.b }, p0/z, [sp, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z0.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z21.b }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z21.s }, p5/z, [x10, x21]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z23.d }, p3/z, [x13, x8]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z31.b }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1b { z5.h }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1d { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1d { z23.d }, p3/z, [sp, x8, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1d { z23.d }, p3/z, [x13, x8, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1d { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z21.s }, p5/z, [x10, x21, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z23.d }, p3/z, [x13, x8, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z5.h }, p3/z, [sp, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1h { z5.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rb { z0.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rb { z31.b }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rb { z31.d }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rb { z31.h }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rb { z31.s }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rd { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rd { z31.d }, p7/z, [sp, #504]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rh { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rh { z31.d }, p7/z, [sp, #126]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rh { z31.h }, p7/z, [sp, #126]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rh { z31.s }, p7/z, [sp, #126]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqb { z0.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqb { z0.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqb { z21.b }, p5/z, [x10, #112]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqb { z23.b }, p3/z, [x13, #-128]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqb { z31.b }, p7/z, [sp, #-16]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqd { z0.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqd { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqd { z23.d }, p3/z, [x13, #-128]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqd { z23.d }, p3/z, [x13, #112]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqd { z31.d }, p7/z, [sp, #-16]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqh { z0.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqh { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqh { z23.h }, p3/z, [x13, #-128]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqh { z23.h }, p3/z, [x13, #112]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqh { z31.h }, p7/z, [sp, #-16]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqw { z0.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqw { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqw { z23.s }, p3/z, [x13, #-128]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqw { z23.s }, p3/z, [x13, #112]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rqw { z31.s }, p7/z, [sp, #-16]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rsb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rsb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rsb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rsb { z31.d }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rsb { z31.h }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rsb { z31.s }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rsh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rsh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rsh { z31.d }, p7/z, [sp, #126]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rsh { z31.s }, p7/z, [sp, #126]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rsw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rsw { z31.d }, p7/z, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rw { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rw { z31.d }, p7/z, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1rw { z31.s }, p7/z, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z0.h }, p0/z, [sp, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z0.h }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z21.s }, p5/z, [x10, x21]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z23.d }, p3/z, [x13, x8]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sh { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sh { z21.s }, p5/z, [sp, x21, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sh { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sh { z21.s }, p5/z, [x10, x21, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sh { z23.d }, p3/z, [x13, x8, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sw { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sw { z23.d }, p3/z, [sp, x8, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sw { z23.d }, p3/z, [x13, x8, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1w { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1w { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1w { z21.s }, p5/z, [sp, x21, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1w { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1w { z21.s }, p5/z, [x10, x21, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1w { z23.d }, p3/z, [x13, x8, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1w { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ld1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ld1w { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ld1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 0.50 0.50 0.50 0.50 ld2b { z0.b, z1.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2b { z0.b, z1.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 0.50 0.50 0.50 0.50 ld2b { z5.b, z6.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 0.50 0.50 0.50 0.50 ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2d { z0.d, z1.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 0.50 0.50 0.50 0.50 ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 0.50 0.50 0.50 0.50 ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2h { z0.h, z1.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 0.50 0.50 0.50 0.50 ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 0.50 0.50 0.50 0.50 ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2w { z0.s, z1.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 0.50 0.50 0.50 0.50 ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 0.75 0.75 0.75 0.75 ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 0.75 0.75 0.75 0.75 ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 0.75 0.75 0.75 0.75 ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 0.75 0.75 0.75 0.75 ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 0.75 0.75 0.75 0.75 ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 0.75 0.75 0.75 0.75 ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 0.75 0.75 0.75 0.75 ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 0.75 0.75 0.75 0.75 ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 0.75 0.75 0.75 0.75 ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 2.00 2.00 2.00 2.00 ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 2.00 2.00 2.00 2.00 ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 2.00 2.00 2.00 2.00 ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 2.00 2.00 2.00 2.00 ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 2.00 2.00 2.00 2.00 ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 2.00 2.00 2.00 2.00 ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 2.00 2.00 2.00 2.00 ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 2.00 2.00 2.00 2.00 ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1b { z0.d }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1b { z0.h }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1b { z0.s }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1b { z31.b }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1b { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1b { z31.h }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1b { z31.s }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1d { z0.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1d { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1h { z0.d }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1h { z0.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1h { z0.s }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1h { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1h { z31.h }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1h { z31.s }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1sb { z0.d }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1sb { z0.h }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1sb { z0.s }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1sb { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1sb { z31.h }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1sb { z31.s }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1sh { z0.d }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1sh { z0.s }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1sh { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1sh { z31.s }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1sw { z0.d }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1sw { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1w { z0.d }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1w { z0.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 0.50 0.50 ldff1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1w { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 2.00 2.00 ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - - - ldff1w { z31.s }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 1.00 1.00 ldff1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1b { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1b { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1b { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1b { z21.b }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1b { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1b { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1b { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1b { z31.b }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1b { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1b { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1b { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1d { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1d { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1h { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1h { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1h { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1h { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1h { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1h { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1h { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1h { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sb { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sb { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sh { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1w { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1w { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1w { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1w { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnf1w { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1b { z0.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1b { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1b { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1b { z21.b }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1b { z23.b }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1b { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1b { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1b { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1b { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1d { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1d { z21.d }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1d { z23.d }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1d { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1d { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1h { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1h { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1h { z21.h }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1h { z23.h }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1h { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1h { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1h { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1h { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1sb { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1sb { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1sb { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1sb { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1sb { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1sb { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1sh { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1sh { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1sh { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1sh { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1sh { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1sh { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1sw { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1sw { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1sw { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1w { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1w { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1w { z21.s }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldnt1w { z23.s }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1w { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 0.50 0.50 0.50 0.50 ldnt1w { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1w { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 1.00 1.00 ldnt1w { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.50 0.50 - - - - - - - - - - - ldr p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.50 0.50 - - - - - - - - - - - ldr p5, [x10, #255, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.50 0.50 - - - - - - - - - - - ldr p7, [x13, #-256, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr z0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr z23, [x13, #255, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - ldr z31, [sp, #-256, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.b, z1.b, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.h, z1.h, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z0.s, z1.s, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z31.b, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z31.d, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z31.h, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsl z31.s, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lslr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lslr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lslr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lslr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.b, z1.b, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.h, z1.h, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z0.s, z1.s, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsr z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 lsrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mad z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - mad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - - - match p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - - - match p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - - - match p15.b, p7/z, z30.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - - - match p15.h, p7/z, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mla z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - mla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - mla z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mla z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mla z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mls z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - mls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - mls z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mls z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mls z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - mov p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - mov p0.b, p0/m, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - mov p0.b, p0/z, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - mov p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - mov p15.b, p15/m, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - mov p15.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.b, b0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.b, p0/m, b0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.b, p0/m, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.b, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - mov z0.b, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.d, #0xe0000000000003ff
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.d, #0xffffffffffff7fff
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.d, #32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.d, d0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.d, p0/m, d0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.d, p0/m, x0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - mov z0.d, x0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.h, #-256
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.h, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.h, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.h, #32767
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.h, h0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.h, p0/m, h0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.h, p0/m, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.h, p0/z, #32512
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - mov z0.h, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.q, q0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.s, #0xffff7fff
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.s, #32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.s, p0/m, s0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.s, p0/m, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z0.s, s0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - mov z0.s, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.d, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.d, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.d, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.d, p0/z, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.d, p0/z, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.d, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.d, p0/z, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.d, p15/m, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.d, p15/m, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.h, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.h, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.h, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.h, p0/z, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.h, p0/z, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.h, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.h, p0/z, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.h, p15/m, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.h, p15/m, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.s, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.s, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.s, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.s, p0/z, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.s, p0/z, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.s, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.s, p0/z, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.s, p15/m, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z21.s, p15/m, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.b, p15/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.b, p7/m, b31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 movprfx z31, z6
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.b, p7/m, wsp
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - mov z31.b, wsp
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.b, z31.b[63]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.d, p15/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.d, p7/m, d31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 movprfx z31.d, p7/z, z6.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.d, p7/m, sp
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - mov z31.d, sp
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.d, z31.d[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.h, p15/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.h, p7/m, h31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.h, p7/m, wsp
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - mov z31.h, wsp
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.h, z31.h[31]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.s, p15/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.s, p7/m, s31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.s, p7/m, wsp
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - mov z31.s, wsp
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z31.s, z31.s[15]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z5.b, #-1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z5.b, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z5.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z5.b, p0/z, #-1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z5.b, p0/z, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z5.b, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z5.b, p15/m, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z5.d, #-6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z5.h, #-6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z5.q, z17.q[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 mov z5.s, #-6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - movs p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - movs p0.b, p0/z, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - movs p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - movs p15.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mrs x3, ID_AA64ZFR0_EL1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mrs x3, ZCR_EL1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mrs x3, ZCR_EL12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mrs x3, ZCR_EL2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mrs x3, ZCR_EL3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - msb z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - msb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - msb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - msb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - msr ZCR_EL1, x3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - msr ZCR_EL12, x3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - msr ZCR_EL2, x3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - msr ZCR_EL3, x3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z0.b, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - mul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - mul z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z0.h, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z29.s, z30.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z31.b, z31.b, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z31.b, z31.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - mul z31.d, z31.d, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - mul z31.d, z31.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - mul z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z31.h, z31.h, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z31.h, z31.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z31.s, z31.s, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - mul z31.s, z31.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - nand p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - nand p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - nands p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - nands p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 nbsl z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg z0.b, p0/m, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 neg z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - - - nmatch p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - - - nmatch p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - - - nmatch p15.b, p7/z, z30.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - - - nmatch p15.h, p7/z, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - nor p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - nor p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - nors p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - nors p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - not p0.b, p0/z, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - not p15.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 not z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 not z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 not z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 not z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - nots p0.b, p0/z, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - nots p15.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - orn p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - orn p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - orns p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - orns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - orr p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z0.d, z0.d, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z0.s, z0.s, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z23.h, z23.h, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z23.h, z23.h, #0xfff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z5.b, z5.b, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 orr z5.b, z5.b, #0xf9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - orrs p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 orv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 orv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 orv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.75 0.25 0.75 orv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - pfalse p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - pfirst p0.b, p15, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - pfirst p15.b, p15, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 pmul z0.b, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 pmul z29.b, z30.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 pmullb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 pmullb z29.q, z30.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 pmullb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 pmullt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 pmullt z29.q, z30.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 pmullt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - pnext p0.b, p15, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - pnext p0.d, p15, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - pnext p0.h, p15, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - pnext p0.s, p15, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - pnext p15.b, p15, p15.b
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb #14, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb #15, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb #6, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb #7, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb #7, p3, [z13.s, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb #7, p3, [z13.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl1keep, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl1keep, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl1keep, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl3strm, p5, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl3strm, p5, [x10, z21.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl3strm, p5, [z10.d, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pldl3strm, p5, [z10.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pstl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pstl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pstl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pstl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pstl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfb pstl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd #14, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd #15, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd #15, p7, [z31.d, #248]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd #15, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd #15, p7, [z31.s, #248]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd #15, p7, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd #6, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd #7, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl1keep, p0, [x0, z0.d, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl1keep, p0, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl1keep, p0, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl1keep, p0, [x0, z0.s, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl1keep, p0, [x0, z0.s, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pldl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pstl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pstl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pstl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pstl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pstl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfd pstl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh #14, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh #15, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh #15, p7, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh #15, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh #15, p7, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh #15, p7, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh #6, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh #7, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl1keep, p0, [x0, z0.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl3strm, p5, [x10, z21.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl3strm, p5, [x10, z21.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl3strm, p5, [x10, z21.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pldl3strm, p5, [x10, z21.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pstl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pstl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pstl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pstl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pstl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfh pstl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw #14, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw #15, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw #15, p7, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw #15, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw #15, p7, [z31.s, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw #15, p7, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw #6, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw #7, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw #7, p3, [x13, z8.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pldl1keep, p0, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pldl1keep, p0, [x0, z0.s, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pldl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pldl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pldl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pldl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pldl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pldl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pldl3strm, p5, [x10, z21.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pldl3strm, p5, [x10, z21.s, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pstl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pstl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pstl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pstl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pstl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - - - prfw pstl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptest p15, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptest p15, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p0.b, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #17
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #19
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #20
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #22
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #23
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #24
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #25
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #26
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #27
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, #28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, mul3
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, mul4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl128
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl256
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl3
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl5
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl64
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl7
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrue p7.s, vl8
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p0.b, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #17
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #19
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #20
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #22
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #23
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #24
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #25
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #26
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #27
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, #28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, mul3
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, mul4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl128
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl256
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl3
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl5
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl64
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl7
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - ptrues p7.s, vl8
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - punpkhi p0.h, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - punpkhi p15.h, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - punpklo p0.h, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - punpklo p15.h, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 raddhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 raddhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 raddhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 raddhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 raddhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 raddhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rax1 z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rbit z0.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rbit z0.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rbit z0.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rbit z0.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - rdffr p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - - - - - rdffr p0.b, p0/z
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - rdffr p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - - - - - rdffr p15.b, p15/z
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - - - - - rdffrs p0.b, p0/z
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - - - - - rdffrs p15.b, p15/z
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - rdvl x0, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - rdvl x21, #-32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - rdvl x23, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - rdvl xzr, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - rev p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - rev p0.d, p1.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - rev p0.h, p1.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - rev p0.s, p1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rev z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 revb z0.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 revb z0.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 revb z0.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 revh z0.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 revh z0.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 revw z0.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrnb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrnb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrnb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrnb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrnb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrnb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrnt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrnt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrnt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrnt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrnt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rshrnt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rsubhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rsubhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rsubhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rsubhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rsubhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 rsubhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saba z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saba z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saba z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saba z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabalb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabalb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabalb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabalt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabalt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabalt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabd z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabd z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabd z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabd z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabdlb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabdlb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabdlb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabdlt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabdlt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sabdlt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sadalp z0.h, p0/m, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sadalp z29.s, p0/m, z30.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sadalp z30.d, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlbt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlbt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlbt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddlt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 2.50 0.50 2.50 saddv d0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 0.50 1.50 saddv d0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 0.50 1.50 saddv d0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddwb z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddwb z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddwb z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddwt z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddwt z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddwt z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sbclb z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sbclb z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sbclt z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sbclt z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf z0.h, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - scvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - scvtf z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - scvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 20.00 - - - sdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 11.00 - - - sdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 20.00 - - - sdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 11.00 - - - sdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sdot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sdot z0.d, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot z0.s, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sel p0.b, p1, p2.b, p3.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sel z23.b, p11, z13.b, z8.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sel z23.d, p11, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sel z23.h, p11, z13.h, z8.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sel z23.s, p11, z13.s, z8.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - setffr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrnb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrnb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrnb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrnb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrnb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrnb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrnt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrnt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrnt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrnt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrnt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shrnt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 shsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli z31.b, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli z31.d, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli z31.h, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sli z31.s, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - sm4e z0.s, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - - - sm4ekey z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax z0.b, z0.b, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax z0.d, z0.d, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax z0.h, z0.h, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax z0.s, z0.s, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax z31.b, z31.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax z31.d, z31.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax z31.h, z31.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smax z31.s, z31.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smaxp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smaxp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smaxp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smaxp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 2.50 0.50 2.50 smaxv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 smaxv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 0.50 1.50 smaxv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 0.50 1.50 smaxv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin z0.b, z0.b, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin z0.d, z0.d, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin z0.h, z0.h, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin z0.s, z0.s, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin z31.b, z31.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin z31.d, z31.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin z31.h, z31.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smin z31.s, z31.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sminp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sminp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sminp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sminp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 2.50 0.50 2.50 sminv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 sminv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 0.50 1.50 sminv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 0.50 1.50 sminv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlalb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlalb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlalb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlalb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlalb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlalt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlalt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlalt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlalt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlslb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlslb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlslb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlslb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlslb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlslt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlslt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlslt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlslt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smlslt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smulh z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smulh z0.b, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - smulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smulh z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smulh z0.h, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smulh z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smulh z29.s, z30.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - smulh z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smullb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smullb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smullb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smullb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smullb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smullt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smullt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smullt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smullt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - smullt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - splice z29.b, p7, { z30.b, z31.b }
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - splice z29.d, p7, { z30.d, z31.d }
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - splice z29.h, p7, { z30.h, z31.h }
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - splice z29.s, p7, { z30.s, z31.s }
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - splice z31.b, p7, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - splice z31.d, p7, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - splice z31.h, p7, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 - - splice z31.s, p7, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqabs z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqadd z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqcadd z0.b, z0.b, z0.b, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqcadd z0.d, z0.d, z0.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqcadd z0.h, z0.h, z0.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqcadd z0.s, z0.s, z0.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqcadd z31.b, z31.b, z31.b, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqcadd z31.d, z31.d, z31.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqcadd z31.h, z31.h, z31.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqcadd z31.s, z31.s, z31.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecb x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecb x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecb x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecb x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecb x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecb x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecd x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecd x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecd x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecd x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecd x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecd x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdecd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdecd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdecd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdech x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdech x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdech x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdech x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdech x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdech x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdech x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdech x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdech x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdech z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdech z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdech z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdech z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecp x0, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecp x0, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecp x0, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecp x0, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecp xzr, p15.b, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecp xzr, p15.d, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecp xzr, p15.h, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecp xzr, p15.s, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 sqdecp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 sqdecp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 sqdecp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecw x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecw x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecw x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecw x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecw x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqdecw x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdecw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdecw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdecw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalb z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalbt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalbt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalbt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalt z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslb z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslbt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslbt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslbt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslt z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmlslt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh z0.b, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqdmulh z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh z0.h, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmulh z29.s, z30.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqdmulh z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmullb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmullb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmullb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmullb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmullb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmullt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmullt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmullt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmullt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqdmullt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincb x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincb x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincb x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincb x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincb x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincb x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincd x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincd x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincd x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincd x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincd x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincd x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqincd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqincd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqincd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqincd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqinch x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqinch x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqinch x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqinch x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqinch x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqinch x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqinch x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqinch x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqinch x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqinch z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqinch z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqinch z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqinch z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincp x0, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincp x0, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincp x0, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincp x0, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincp xzr, p15.b, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincp xzr, p15.d, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincp xzr, p15.h, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincp xzr, p15.s, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 sqincp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 sqincp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 sqincp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincw x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincw x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincw x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincw x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincw x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - sqincw x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqincw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqincw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqincw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqincw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqneg z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z0.b, z1.b, z2.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdcmlah z0.d, z1.d, z2.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z0.h, z1.h, z2.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z0.h, z1.h, z2.h[0], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z0.s, z1.s, z2.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z0.s, z1.s, z2.s[0], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z15.b, z16.b, z17.b, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdcmlah z15.d, z16.d, z17.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z15.h, z16.h, z17.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z15.s, z16.s, z17.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z29.b, z30.b, z31.b, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdcmlah z29.d, z30.d, z31.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z29.h, z30.h, z31.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z29.s, z30.s, z31.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z31.b, z31.b, z31.b, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdcmlah z31.d, z31.d, z31.d, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z31.h, z30.h, z7.h[0], #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z31.h, z31.h, z31.h, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z31.s, z30.s, z7.s[0], #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdcmlah z31.s, z31.s, z31.s, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmlah z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlah z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmlah z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmlah z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmlah z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmlah z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmlsh z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmlsh z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmlsh z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmlsh z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmlsh z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmlsh z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmulh z0.b, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmulh z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmulh z0.h, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmulh z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmulh z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - sqrdmulh z29.s, z30.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - sqrdmulh z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrnb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrnb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrnb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrnb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrnb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrnb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrnt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrnt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrnt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrnt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrnt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrnt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrunb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrunb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrunb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrunb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrunb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrunb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrunt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrunt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrunt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrunt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrunt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqrshrunt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshlu z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrnb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrnb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrnb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrnb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrnb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrnb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrnt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrnt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrnt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrnt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrnt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrnt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrunb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrunb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrunb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrunb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrunb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrunb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrunt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrunt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrunt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrunt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrunt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqshrunt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsub z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtnb z0.b, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtnb z0.h, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtnb z0.s, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtnt z0.b, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtnt z0.h, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtnt z0.s, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtunb z0.b, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtunb z0.h, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtunb z0.s, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtunt z0.b, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtunt z0.h, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sqxtunt z0.s, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srhadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srhadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srhadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srhadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sri z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 srshr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 srsra z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 srsra z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 srsra z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 srsra z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 srsra z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 srsra z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 srsra z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 srsra z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshllb z0.d, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshllb z0.h, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshllb z0.s, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshllb z31.d, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshllb z31.h, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshllb z31.s, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshllt z0.d, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshllt z0.h, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshllt z0.s, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshllt z31.d, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshllt z31.h, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sshllt z31.s, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ssra z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ssra z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ssra z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ssra z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ssra z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ssra z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ssra z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ssra z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssublb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssublb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssublb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssublbt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssublbt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssublbt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssublt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssublt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssublt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubltb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubltb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubltb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubwb z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubwb z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubwb z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubwt z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubwt z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ssubwt z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z0.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z0.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z0.d }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1b { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1b { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1b { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z0.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1b { z0.d }, p7, [z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z0.h }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z0.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z0.s }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1b { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1b { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z0.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1b { z0.s }, p7, [z0.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z21.b }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z21.h }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z21.s }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z31.b }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1b { z31.d }, p7, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z31.h }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1b { z31.s }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1b { z31.s }, p7, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1d { z0.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1d { z0.d }, p0, [x0, z0.d, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1d { z0.d }, p0, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1d { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1d { z0.d }, p0, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1d { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1d { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1d { z0.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1d { z0.d }, p7, [z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1d { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1d { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1d { z31.d }, p7, [z31.d, #248]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 - - st1h { z0.d }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1h { z0.d }, p0, [x0, z0.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1h { z0.d }, p0, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1h { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1h { z0.d }, p0, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1h { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1h { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1h { z0.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1h { z0.d }, p7, [z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 - - st1h { z0.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1h { z0.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 - - st1h { z0.s }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1h { z0.s }, p0, [x0, z0.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1h { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1h { z0.s }, p0, [x0, z0.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1h { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1h { z0.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1h { z0.s }, p7, [z0.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1h { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1h { z21.h }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1h { z21.s }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1h { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1h { z31.d }, p7, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1h { z31.h }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1h { z31.s }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1h { z31.s }, p7, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1w { z0.d }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1w { z0.d }, p0, [x0, z0.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1w { z0.d }, p0, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1w { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1w { z0.d }, p0, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1w { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1w { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1w { z0.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1w { z0.d }, p7, [z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1w { z0.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1w { z0.s }, p0, [x0, z0.s, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1w { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1w { z0.s }, p0, [x0, z0.s, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1w { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1w { z0.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1w { z0.s }, p7, [z0.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1w { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1w { z21.s }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1w { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - st1w { z31.d }, p7, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st1w { z31.s }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - st1w { z31.s }, p7, [z31.s, #124]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 - - st2b { z0.b, z1.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st2b { z0.b, z1.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 - - st2b { z5.b, z6.b }, p3, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 - - st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st2d { z0.d, z1.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 - - st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 1.00 1.00 1.00 - - st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st2h { z0.h, z1.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 1.00 1.00 1.00 - - st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 - - st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st2w { z0.s, z1.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 - - st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 - - st3b { z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 - - st3b { z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 - - st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 - - st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 - - st3b { z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 - - st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 - - st3d { z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 - - st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 - - st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 - - st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 - - st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 - - st3h { z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 - - st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 - - st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 - - st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 - - st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 - - st3w { z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 - - st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 - - st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 - - st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 - - st4b { z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 - - st4b { z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 - - st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 - - st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 - - st4b { z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 - - st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 - - st4d { z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 - - st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 - - st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 - - st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 - - st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 - - st4h { z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 - - st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 - - st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 - - st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 - - st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 - - st4w { z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 - - st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 - - st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 - - st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1b { z0.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1b { z0.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - stnt1b { z0.d }, p0, [z1.d]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - stnt1b { z0.s }, p0, [z1.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1b { z21.b }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1b { z23.b }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - stnt1b { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - stnt1b { z31.d }, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - stnt1b { z31.s }, p7, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - stnt1b { z31.s }, p7, [z31.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1d { z0.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1d { z0.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - stnt1d { z0.d }, p0, [z1.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1d { z21.d }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1d { z23.d }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - stnt1d { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - stnt1d { z31.d }, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - stnt1h { z0.d }, p0, [z1.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 - - stnt1h { z0.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1h { z0.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - stnt1h { z0.s }, p0, [z1.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1h { z21.h }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1h { z23.h }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - stnt1h { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - stnt1h { z31.d }, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - stnt1h { z31.s }, p7, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - stnt1h { z31.s }, p7, [z31.s]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - stnt1w { z0.d }, p0, [z1.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1w { z0.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1w { z0.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - stnt1w { z0.s }, p0, [z1.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1w { z21.s }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - stnt1w { z23.s }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - stnt1w { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 - - stnt1w { z31.d }, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - stnt1w { z31.s }, p7, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - - - 3.00 - - - - - - - - 3.00 3.00 3.00 - - stnt1w { z31.s }, p7, [z31.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 - - - - str p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 - - - - str p15, [sp, #-256, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 - - - - str p5, [x10, #255, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - str z0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - str z21, [x10, #-256, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 - - str z31, [sp, #255, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z21.b, p5/m, z21.b, z10.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z21.b, z10.b, z21.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z21.d, p5/m, z21.d, z10.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z21.d, z10.d, z21.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z21.h, p5/m, z21.h, z10.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z21.h, z10.h, z21.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z21.s, p5/m, z21.s, z10.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z21.s, z10.s, z21.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z23.b, p3/m, z23.b, z13.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z23.b, z13.b, z8.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z23.d, p3/m, z23.d, z13.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z23.h, p3/m, z23.h, z13.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z23.h, z13.h, z8.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z23.s, p3/m, z23.s, z13.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z23.s, z13.s, z8.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sub z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 subr z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sunpkhi z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sunpkhi z31.h, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sunpkhi z31.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sunpklo z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sunpklo z31.h, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sunpklo z31.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 suqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sxtb z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sxtb z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sxtb z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sxtb z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sxtb z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sxtb z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sxth z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sxth z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sxth z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sxth z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sxtw z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sxtw z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbl z28.b, { z29.b, z30.b }, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbl z28.d, { z29.d, z30.d }, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbl z28.h, { z29.h, z30.h }, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbl z28.s, { z29.s, z30.s }, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbl z31.b, { z31.b }, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbl z31.d, { z31.d }, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbl z31.h, { z31.h }, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbl z31.s, { z31.s }, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbx z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbx z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbx z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 tbx z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - trn1 p15.b, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - trn1 p15.d, p15.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - trn1 p15.h, p15.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - trn1 p15.s, p15.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn1 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn1 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn1 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn1 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - trn2 p15.b, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - trn2 p15.d, p15.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - trn2 p15.h, p15.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - trn2 p15.s, p15.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn2 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn2 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn2 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 trn2 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaba z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaba z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaba z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaba z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabalb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabalb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabalb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabalt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabalt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabalt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabd z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabd z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabd z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabd z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabdlb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabdlb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabdlb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabdlt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabdlt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uabdlt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uadalp z0.h, p0/m, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uadalp z29.s, p0/m, z30.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uadalp z30.d, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddlt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 2.50 0.50 2.50 uaddv d0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uaddv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 0.50 1.50 uaddv d0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 0.50 1.50 uaddv d0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddwb z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddwb z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddwb z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddwt z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddwt z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddwt z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf z0.h, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - 2.00 - ucvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 20.00 - - - udiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 11.00 - - - udiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 20.00 - - - udivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 11.00 - - - udivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - udot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - udot z0.d, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot z0.s, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uhsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umax z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umax z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umax z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umax z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umax z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umax z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umaxp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umaxp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umaxp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umaxp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 2.50 0.50 2.50 umaxv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 umaxv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 0.50 1.50 umaxv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 0.50 1.50 umaxv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umin z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umin z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umin z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umin z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umin z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 umin z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uminp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uminp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uminp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uminp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 2.50 0.50 2.50 uminv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 0.50 0.50 uminv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 0.50 1.50 uminv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 0.50 1.50 uminv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlalb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlalb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlalb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlalb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlalb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlalt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlalt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlalt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlalt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlslb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlslb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlslb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlslb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlslb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlslt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlslt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlslt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlslt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umlslt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ummla z0.s, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umulh z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umulh z0.b, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - umulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umulh z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umulh z0.h, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umulh z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umulh z29.s, z30.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - umulh z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umullb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umullb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umullb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umullb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umullb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umullt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umullt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umullt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umullt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 - umullt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqadd z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecb w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecb w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecb w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecb w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecb x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecb x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecd w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecd w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecd w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecd w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecd x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecd x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqdecd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqdecd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqdecd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdech w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdech w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdech w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdech w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdech x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdech x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdech x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdech x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdech x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqdech z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqdech z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqdech z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqdech z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecp wzr, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecp wzr, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecp wzr, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecp wzr, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecp x0, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecp x0, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecp x0, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecp x0, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 uqdecp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 uqdecp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 uqdecp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecw w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecw w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecw w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecw w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecw x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqdecw x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqdecw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqdecw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqdecw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincb w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincb w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincb w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincb w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincb x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincb x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincd w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincd w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincd w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincd w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincd x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincd x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqincd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqincd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqincd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqincd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqinch w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqinch w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqinch w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqinch w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqinch x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqinch x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqinch x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqinch x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqinch x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqinch z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqinch z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqinch z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqinch z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincp wzr, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincp wzr, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincp wzr, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincp wzr, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincp x0, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincp x0, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincp x0, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincp x0, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 uqincp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 uqincp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.25 0.25 0.25 0.25 uqincp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincw w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincw w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincw w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincw w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincw x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uqincw x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqincw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqincw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqincw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqincw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrnb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrnb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrnb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrnb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrnb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrnb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrnt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrnt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrnt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrnt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrnt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqrshrnt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrnb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrnb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrnb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrnb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrnb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrnb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrnt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrnt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrnt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrnt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrnt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqshrnt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsub z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtnb z0.b, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtnb z0.h, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtnb z0.s, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtnt z0.b, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtnt z0.h, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uqxtnt z0.s, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - urecpe z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urhadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urhadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urhadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urhadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 urshr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - 1.00 - ursqrte z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ursra z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ursra z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ursra z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ursra z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ursra z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ursra z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ursra z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 ursra z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushllb z0.d, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushllb z0.h, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushllb z0.s, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushllb z31.d, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushllb z31.h, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushllb z31.s, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushllt z0.d, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushllt z0.h, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushllt z0.s, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushllt z31.d, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushllt z31.h, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ushllt z31.s, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usmmla z0.s, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 usra z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 usra z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 usra z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 usra z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 usra z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 usra z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 usra z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 0.50 - 0.50 usra z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usublb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usublb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usublb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usublt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usublt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usublt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubwb z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubwb z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubwb z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubwt z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubwt z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 usubwt z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uunpkhi z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uunpkhi z31.h, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uunpkhi z31.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uunpklo z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uunpklo z31.h, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uunpklo z31.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uxtb z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uxtb z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uxtb z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uxtb z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uxtb z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uxtb z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uxth z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uxth z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uxth z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uxth z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uxtw z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uxtw z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uzp1 p15.b, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uzp1 p15.d, p15.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uzp1 p15.h, p15.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uzp1 p15.s, p15.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp1 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp1 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp1 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp1 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uzp2 p15.b, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uzp2 p15.d, p15.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uzp2 p15.h, p15.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - uzp2 p15.s, p15.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp2 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp2 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp2 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 uzp2 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilege p15.b, w0, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilege p15.b, wzr, w0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilege p15.b, x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilege p15.b, xzr, x0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilege p15.d, w0, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilege p15.d, x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilege p15.h, w0, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilege p15.h, x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilege p15.s, w0, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilege p15.s, x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilerw p15.b, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilerw p15.d, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilerw p15.h, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilerw p15.s, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilewr p15.b, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilewr p15.d, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilewr p15.h, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - - - whilewr p15.s, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - wrffr p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - - - wrffr p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xar z0.b, z0.b, z1.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xar z0.d, z0.d, z1.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xar z0.h, z0.h, z1.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xar z0.s, z0.s, z1.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xar z31.b, z31.b, z30.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xar z31.d, z31.d, z30.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xar z31.h, z31.h, z30.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 xar z31.s, z31.s, z30.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip1 p0.b, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip1 p0.d, p0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip1 p0.h, p0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip1 p0.s, p0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip1 p15.b, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip1 p15.d, p15.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip1 p15.h, p15.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip1 p15.s, p15.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip1 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip2 p0.b, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip2 p0.d, p0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip2 p0.h, p0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip2 p0.s, p0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip2 p15.b, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip2 p15.d, p15.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip2 p15.h, p15.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - - - zip2 p15.s, p15.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 zip2 z31.s, z31.s, z31.s
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-writeback.s
new file mode 100644
index 0000000..ca3c8da
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-writeback.s
@@ -0,0 +1,3979 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN G01
+ld1 { v1.1d }, [x27], #8
+ld1 { v1.2d }, [x27], #16
+ld1 { v1.2s }, [x27], #8
+ld1 { v1.4h }, [x27], #8
+ld1 { v1.4s }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G02
+ld1 { v1.8b }, [x27], #8
+ld1 { v1.8h }, [x27], #16
+ld1 { v1.16b }, [x27], #16
+ld1 { v1.1d }, [x27], x28
+ld1 { v1.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G03
+ld1 { v1.2s }, [x27], x28
+ld1 { v1.4h }, [x27], x28
+ld1 { v1.4s }, [x27], x28
+ld1 { v1.8b }, [x27], x28
+ld1 { v1.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G04
+ld1 { v1.16b }, [x27], x28
+ld1 { v1.1d, v2.1d }, [x27], #16
+ld1 { v1.2d, v2.2d }, [x27], #32
+ld1 { v1.2s, v2.2s }, [x27], #16
+ld1 { v1.4h, v2.4h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G05
+ld1 { v1.4s, v2.4s }, [x27], #32
+ld1 { v1.8b, v2.8b }, [x27], #16
+ld1 { v1.8h, v2.8h }, [x27], #32
+ld1 { v1.16b, v2.16b }, [x27], #32
+ld1 { v1.1d, v2.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G06
+ld1 { v1.2d, v2.2d }, [x27], x28
+ld1 { v1.2s, v2.2s }, [x27], x28
+ld1 { v1.4h, v2.4h }, [x27], x28
+ld1 { v1.4s, v2.4s }, [x27], x28
+ld1 { v1.8b, v2.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G07
+ld1 { v1.8h, v2.8h }, [x27], x28
+ld1 { v1.16b, v2.16b }, [x27], x28
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G08
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G09
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G10
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G11
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G12
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G13
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+ld1 { v1.b }[0], [x27], #1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G14
+ld1 { v1.b }[8], [x27], #1
+ld1 { v1.b }[0], [x27], x28
+ld1 { v1.b }[8], [x27], x28
+ld1 { v1.h }[0], [x27], #2
+ld1 { v1.h }[4], [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G15
+ld1 { v1.h }[0], [x27], x28
+ld1 { v1.h }[4], [x27], x28
+ld1 { v1.s }[0], [x27], #4
+ld1 { v1.s }[0], [x27], x28
+ld1 { v1.d }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G16
+ld1 { v1.d }[0], [x27], x28
+ld1r { v1.1d }, [x27], #8
+ld1r { v1.2d }, [x27], #8
+ld1r { v1.2s }, [x27], #4
+ld1r { v1.4h }, [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G17
+ld1r { v1.4s }, [x27], #4
+ld1r { v1.8b }, [x27], #1
+ld1r { v1.8h }, [x27], #2
+ld1r { v1.16b }, [x27], #1
+ld1r { v1.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G18
+ld1r { v1.2d }, [x27], x28
+ld1r { v1.2s }, [x27], x28
+ld1r { v1.4h }, [x27], x28
+ld1r { v1.4s }, [x27], x28
+ld1r { v1.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G19
+ld1r { v1.8h }, [x27], x28
+ld1r { v1.16b }, [x27], x28
+ld2 { v1.2d, v2.2d }, [x27], #32
+ld2 { v1.2s, v2.2s }, [x27], #16
+ld2 { v1.4h, v2.4h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G20
+ld2 { v1.4s, v2.4s }, [x27], #32
+ld2 { v1.8b, v2.8b }, [x27], #16
+ld2 { v1.8h, v2.8h }, [x27], #32
+ld2 { v1.16b, v2.16b }, [x27], #32
+ld2 { v1.2d, v2.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G21
+ld2 { v1.2s, v2.2s }, [x27], x28
+ld2 { v1.4h, v2.4h }, [x27], x28
+ld2 { v1.4s, v2.4s }, [x27], x28
+ld2 { v1.8b, v2.8b }, [x27], x28
+ld2 { v1.8h, v2.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G22
+ld2 { v1.16b, v2.16b }, [x27], x28
+ld2 { v1.b, v2.b }[0], [x27], #2
+ld2 { v1.b, v2.b }[8], [x27], #2
+ld2 { v1.b, v2.b }[0], [x27], x28
+ld2 { v1.b, v2.b }[8], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G23
+ld2 { v1.h, v2.h }[0], [x27], #4
+ld2 { v1.h, v2.h }[4], [x27], #4
+ld2 { v1.h, v2.h }[0], [x27], x28
+ld2 { v1.h, v2.h }[4], [x27], x28
+ld2 { v1.s, v2.s }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G24
+ld2 { v1.s, v2.s }[0], [x27], x28
+ld2 { v1.d, v2.d }[0], [x27], #16
+ld2 { v1.d, v2.d }[0], [x27], x28
+ld2r { v1.1d, v2.1d }, [x27], #16
+ld2r { v1.2d, v2.2d }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G25
+ld2r { v1.2s, v2.2s }, [x27], #8
+ld2r { v1.4h, v2.4h }, [x27], #4
+ld2r { v1.4s, v2.4s }, [x27], #8
+ld2r { v1.8b, v2.8b }, [x27], #2
+ld2r { v1.8h, v2.8h }, [x27], #4
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G26
+ld2r { v1.16b, v2.16b }, [x27], #2
+ld2r { v1.1d, v2.1d }, [x27], x28
+ld2r { v1.2d, v2.2d }, [x27], x28
+ld2r { v1.2s, v2.2s }, [x27], x28
+ld2r { v1.4h, v2.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G27
+ld2r { v1.4s, v2.4s }, [x27], x28
+ld2r { v1.8b, v2.8b }, [x27], x28
+ld2r { v1.8h, v2.8h }, [x27], x28
+ld2r { v1.16b, v2.16b }, [x27], x28
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G29
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G30
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G31
+ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G32
+ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G33
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G34
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G35
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G36
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G37
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G38
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G39
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G40
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G41
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G42
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G43
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G44
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+ldp s1, s2, [x27], #248
+ldp d1, d2, [x27], #496
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G45
+ldp q1, q2, [x27], #992
+ldp s1, s2, [x27, #248]!
+ldp d1, d2, [x27, #496]!
+ldp q1, q2, [x27, #992]!
+ldp w1, w2, [x27], #248
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G46
+ldp x1, x2, [x27], #496
+ldp w1, w2, [x27, #248]!
+ldp x1, x2, [x27, #496]!
+ldpsw x1, x2, [x27], #248
+ldpsw x1, x2, [x27, #248]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G47
+ldr b1, [x27], #254
+ldr h1, [x27], #254
+ldr s1, [x27], #254
+ldr d1, [x27], #254
+ldr q1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G48
+ldr b1, [x27, #254]!
+ldr h1, [x27, #254]!
+ldr s1, [x27, #254]!
+ldr d1, [x27, #254]!
+ldr q1, [x27, #254]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G49
+ldr w1, [x27], #254
+ldr x1, [x27], #254
+ldr w1, [x27, #254]!
+ldr x1, [x27, #254]!
+ldrb w1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G50
+ldrb w1, [x27, #254]!
+ldrh w1, [x27], #254
+ldrh w1, [x27, #254]!
+ldrsb w1, [x27], #254
+ldrsb x1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G51
+ldrsb w1, [x27, #254]!
+ldrsb x1, [x27, #254]!
+ldrsh w1, [x27], #254
+ldrsh x1, [x27], #254
+ldrsh w1, [x27, #254]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G52
+ldrsh x1, [x27, #254]!
+ldrsw x1, [x27], #254
+ldrsw x1, [x27, #254]!
+st1 { v1.1d }, [x27], #8
+st1 { v1.2d }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G53
+st1 { v1.2s }, [x27], #8
+st1 { v1.4h }, [x27], #8
+st1 { v1.4s }, [x27], #16
+st1 { v1.8b }, [x27], #8
+st1 { v1.8h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G54
+st1 { v1.16b }, [x27], #16
+st1 { v1.1d }, [x27], x28
+st1 { v1.2d }, [x27], x28
+st1 { v1.2s }, [x27], x28
+st1 { v1.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G55
+st1 { v1.4s }, [x27], x28
+st1 { v1.8b }, [x27], x28
+st1 { v1.8h }, [x27], x28
+st1 { v1.16b }, [x27], x28
+st1 { v1.1d, v2.1d }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G56
+st1 { v1.2d, v2.2d }, [x27], #32
+st1 { v1.2s, v2.2s }, [x27], #16
+st1 { v1.4h, v2.4h }, [x27], #16
+st1 { v1.4s, v2.4s }, [x27], #32
+st1 { v1.8b, v2.8b }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G57
+st1 { v1.8h, v2.8h }, [x27], #32
+st1 { v1.16b, v2.16b }, [x27], #32
+st1 { v1.1d, v2.1d }, [x27], x28
+st1 { v1.2d, v2.2d }, [x27], x28
+st1 { v1.2s, v2.2s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G58
+st1 { v1.4h, v2.4h }, [x27], x28
+st1 { v1.4s, v2.4s }, [x27], x28
+st1 { v1.8b, v2.8b }, [x27], x28
+st1 { v1.8h, v2.8h }, [x27], x28
+st1 { v1.16b, v2.16b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G59
+st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G60
+st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G61
+st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G62
+st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G63
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G64
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G65
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+st1 { v1.b }[0], [x27], #1
+st1 { v1.b }[8], [x27], #1
+st1 { v1.b }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G66
+st1 { v1.b }[8], [x27], x28
+st1 { v1.h }[0], [x27], #2
+st1 { v1.h }[4], [x27], #2
+st1 { v1.h }[0], [x27], x28
+st1 { v1.h }[4], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G67
+st1 { v1.s }[0], [x27], #4
+st1 { v1.s }[0], [x27], x28
+st1 { v1.d }[0], [x27], #8
+st1 { v1.d }[0], [x27], x28
+st2 { v1.2d, v2.2d }, [x27], #32
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G68
+st2 { v1.2s, v2.2s }, [x27], #16
+st2 { v1.4h, v2.4h }, [x27], #16
+st2 { v1.4s, v2.4s }, [x27], #32
+st2 { v1.8b, v2.8b }, [x27], #16
+st2 { v1.8h, v2.8h }, [x27], #32
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G69
+st2 { v1.16b, v2.16b }, [x27], #32
+st2 { v1.2d, v2.2d }, [x27], x28
+st2 { v1.2s, v2.2s }, [x27], x28
+st2 { v1.4h, v2.4h }, [x27], x28
+st2 { v1.4s, v2.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G70
+st2 { v1.8b, v2.8b }, [x27], x28
+st2 { v1.8h, v2.8h }, [x27], x28
+st2 { v1.16b, v2.16b }, [x27], x28
+st2 { v1.b, v2.b }[0], [x27], #2
+st2 { v1.b, v2.b }[8], [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G71
+st2 { v1.b, v2.b }[0], [x27], x28
+st2 { v1.b, v2.b }[8], [x27], x28
+st2 { v1.h, v2.h }[0], [x27], #4
+st2 { v1.h, v2.h }[4], [x27], #4
+st2 { v1.h, v2.h }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G72
+st2 { v1.h, v2.h }[4], [x27], x28
+st2 { v1.s, v2.s }[0], [x27], #8
+st2 { v1.s, v2.s }[0], [x27], x28
+st2 { v1.d, v2.d }[0], [x27], #16
+st2 { v1.d, v2.d }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G73
+st2g x26, [x27], #4064
+st2g x26, [x27, #4064]!
+st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G74
+st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G75
+st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G76
+st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G77
+st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G78
+st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G79
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G80
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G81
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G82
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G83
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+stg x26, [x27], #4064
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G84
+stg x26, [x27, #4064]!
+stgp x1, x2, [x27], #992
+stgp x1, x2, [x27, #992]!
+stp s1, s2, [x27], #248
+stp d1, d2, [x27], #496
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G85
+stp q1, q2, [x27], #992
+stp s1, s2, [x27, #248]!
+stp d1, d2, [x27, #496]!
+stp q1, q2, [x27, #992]!
+stp w1, w2, [x27], #248
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G86
+stp x1, x2, [x27], #496
+stp w1, w2, [x27, #248]!
+stp x1, x2, [x27, #496]!
+str b1, [x27], #254
+str h1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G87
+str s1, [x27], #254
+str d1, [x27], #254
+str q1, [x27], #254
+str b1, [x27, #254]!
+str h1, [x27, #254]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G88
+str s1, [x27, #254]!
+str d1, [x27, #254]!
+str q1, [x27, #254]!
+str w1, [x27], #254
+str x1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G89
+str w1, [x27, #254]!
+str x1, [x27, #254]!
+strb w1, [x27], #254
+strb w1, [x27, #254]!
+strh w1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G90
+strh w1, [x27, #254]!
+stz2g x26, [x27], #4064
+stz2g x26, [x27, #4064]!
+stzg x26, [x27], #4064
+stzg x26, [x27, #4064]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G91
+ldr x1, [x27], #254
+ldr x2, [x1], #254
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - G01
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.97
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.4s }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [1] Code Region - G02
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.97
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [2] Code Region - G03
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.97
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [3] Code Region - G04
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.76
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [4] Code Region - G05
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.95
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [5] Code Region - G06
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.95
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [6] Code Region - G07
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.54
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 4.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [7] Code Region - G08
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.94
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,2] .D=eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,4] . D==eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [8] Code Region - G09
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.94
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [9] Code Region - G10
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 608
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.62
+# CHECK-NEXT: IPC: 0.82
+# CHECK-NEXT: Block RThroughput: 5.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeeER. ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,4] . D==eeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [10] Code Region - G11
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 675
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.70
+# CHECK-NEXT: IPC: 0.74
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,1] D=eeeeeeeER . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,2] .D=eeeeeeeER . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] .D===eeeeeeeER. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,4] . D===eeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 3. 1 4.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 1 2.6 0.4 0.0 <total>
+
+# CHECK: [11] Code Region - G12
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 675
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.70
+# CHECK-NEXT: IPC: 0.74
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,1] D=eeeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeeER . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D===eeeeeeeER. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,4] . D===eeeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.4 0.0 <total>
+
+# CHECK: [12] Code Region - G13
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1210
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.90
+# CHECK-NEXT: IPC: 0.41
+# CHECK-NEXT: Block RThroughput: 5.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeeeeeER. . .. ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeeER . .. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeeER . .. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D===eeeeeeeER . .. ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,4] . D=========eeeeeeeeER ld1 { v1.b }[0], [x27], #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 1 3.8 0.4 0.0 <total>
+
+# CHECK: [13] Code Region - G14
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld1 { v1.h }[4], [x27], #2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 1 16.6 0.2 0.0 <total>
+
+# CHECK: [14] Code Region - G15
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld1 { v1.d }[0], [x27], #8
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 1 16.6 0.2 0.0 <total>
+
+# CHECK: [15] Code Region - G16
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1203
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.25
+# CHECK-NEXT: IPC: 0.42
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld1r { v1.4h }, [x27], #2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [16] Code Region - G17
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.94
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld1r { v1.1d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [17] Code Region - G18
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.94
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld1r { v1.8b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [18] Code Region - G19
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 1900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.73
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [19] Code Region - G20
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.71
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [20] Code Region - G21
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.31
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [21] Code Region - G22
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3310
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.63
+# CHECK-NEXT: IPC: 0.15
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 1 16.2 0.2 0.0 <total>
+
+# CHECK: [22] Code Region - G23
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 1 16.2 0.2 0.0 <total>
+
+# CHECK: [23] Code Region - G24
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2603
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.77
+# CHECK-NEXT: IPC: 0.19
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeeeeER . . . . ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D================eeeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,4] . D================eeeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: 1 12.0 0.2 0.0 <total>
+
+# CHECK: [24] Code Region - G25
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.92
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [25] Code Region - G26
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.92
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [26] Code Region - G27
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.51
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 2.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [27] Code Region - G28
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 6.27
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,1] .DeeeeeeeeER . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,3] . DeeeeeeeeER. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,4] . DeeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 1 1.0 0.2 0.0 <total>
+
+# CHECK: [28] Code Region - G29
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 3300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 6.47
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 4.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,1] .DeeeeeeeeER . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeeeeeeeER. ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,4] . DeeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 1 1.0 0.2 0.0 <total>
+
+# CHECK: [29] Code Region - G30
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1910
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.68
+# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeeeeER . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeeeeER . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,2] . DeeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . D=======eeeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,4] . D==============eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 8.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 4. 1 15.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 1 5.2 0.2 0.0 <total>
+
+# CHECK: [30] Code Region - G31
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] .D=======eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,2] . D==============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,3] . D=====================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,4] . D============================eeeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 3. 1 22.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 4. 1 29.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 1 15.0 0.2 0.0 <total>
+
+# CHECK: [31] Code Region - G32
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 3.8
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] .D=======eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,2] . D==============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D=====================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,4] . D============================eeeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 22.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 4. 1 29.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 1 15.0 0.2 0.0 <total>
+
+# CHECK: [32] Code Region - G33
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 6.27
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] .DeeeeeeeeER . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,3] . DeeeeeeeeER. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,4] . DeeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: 1 1.0 0.2 0.0 <total>
+
+# CHECK: [33] Code Region - G34
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 3300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 6.47
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 4.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: [0,1] .DeeeeeeeeER . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,3] . DeeeeeeeeER. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,4] . DeeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 1 1.0 0.2 0.0 <total>
+
+# CHECK: [34] Code Region - G35
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 6.27
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeeeeER . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeeeeeeeER. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . DeeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 1 1.0 0.2 0.0 <total>
+
+# CHECK: [35] Code Region - G36
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 710
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 6.34
+# CHECK-NEXT: IPC: 0.70
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeeeeeER .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,2] . DeeeeeeeeER .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,3] . DeeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,4] . D=eeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1 1.2 0.4 0.0 <total>
+
+# CHECK: [36] Code Region - G37
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 811
+# CHECK-NEXT: Total uOps: 4900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 6.04
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234567
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] .DeeeeeeeeeER . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,2] . DeeeeeeeeeER. . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,3] . DeeeeeeeeeER. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,4] . .D=eeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 2. 1 1.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 3. 1 1.0 1.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 1 1.2 0.8 0.0 <total>
+
+# CHECK: [37] Code Region - G38
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 809
+# CHECK-NEXT: Total uOps: 4900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 6.06
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeeeeeER .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,2] . DeeeeeeeeER .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeeeeeeeeER.. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,4] . DeeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 4. 1 1.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 1 1.0 0.4 0.0 <total>
+
+# CHECK: [38] Code Region - G39
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,1] .D=======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,2] . D==============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D=====================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,4] . D============================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 3. 1 22.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 4. 1 29.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 1 15.0 0.2 0.0 <total>
+
+# CHECK: [39] Code Region - G40
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,1] .D=======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,2] . D==============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,3] . D=====================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,4] . D============================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 3. 1 22.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 4. 1 29.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 1 15.0 0.2 0.0 <total>
+
+# CHECK: [40] Code Region - G41
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1903
+# CHECK-NEXT: Total uOps: 4100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.15
+# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: Block RThroughput: 5.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeeeeeeER . .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,1] .D=======eeeeeeeeER .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeER.. ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,3] . D=======eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,4] . D=======eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 3. 1 8.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: 1 6.6 0.2 0.0 <total>
+
+# CHECK: [41] Code Region - G42
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 659
+# CHECK-NEXT: Total uOps: 4300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 6.53
+# CHECK-NEXT: IPC: 0.76
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: [0,1] .DeeeeeeeeER . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,3] . DeeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,4] . D=eeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: 1 1.2 0.4 0.0 <total>
+
+# CHECK: [42] Code Region - G43
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 610
+# CHECK-NEXT: Total uOps: 4200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 6.89
+# CHECK-NEXT: IPC: 0.82
+# CHECK-NEXT: Block RThroughput: 5.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeeeeER . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,4] . D=eeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1 1.2 0.4 0.0 <total>
+
+# CHECK: [43] Code Region - G44
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 6.69
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 4.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,2] . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . DeeeeeeE-R ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,4] . D=eeeeeeER ldp d1, d2, [x27], #496
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 1.0 0.0 1.0 ldp s1, s2, [x27], #248
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldp d1, d2, [x27], #496
+# CHECK-NEXT: 1 1.2 0.2 0.2 <total>
+
+# CHECK: [44] Code Region - G45
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 507
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.54
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ldp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] D=eeeeeeER.. ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,2] .D=eeeeeeER. ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,3] .D==eeeeeeER ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,4] . D==eeeeE-R ldp w1, w2, [x27], #248
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: 4. 1 3.0 0.0 1.0 ldp w1, w2, [x27], #248
+# CHECK-NEXT: 1 2.2 0.2 0.2 <total>
+
+# CHECK: [45] Code Region - G46
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 507
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.14
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER .. ldp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] D=eeeeER .. ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,2] D==eeeeER .. ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,3] .D==eeeeeER. ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: [0,4] . D==eeeeeER ldpsw x1, x2, [x27, #248]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: 1 2.4 0.2 0.0 <total>
+
+# CHECK: [46] Code Region - G47
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.95
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27], #254
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ldr h1, [x27], #254
+# CHECK-NEXT: [0,2] D==eeeeeeER . ldr s1, [x27], #254
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ldr d1, [x27], #254
+# CHECK-NEXT: [0,4] .D===eeeeeeER ldr q1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldr h1, [x27], #254
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldr s1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ldr d1, [x27], #254
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ldr q1, [x27], #254
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [47] Code Region - G48
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.95
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ldr h1, [x27, #254]!
+# CHECK-NEXT: [0,2] D==eeeeeeER . ldr s1, [x27, #254]!
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ldr d1, [x27, #254]!
+# CHECK-NEXT: [0,4] .D===eeeeeeER ldr q1, [x27, #254]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldr h1, [x27, #254]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldr s1, [x27, #254]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ldr d1, [x27, #254]!
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ldr q1, [x27, #254]!
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [48] Code Region - G49
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldr w1, [x27], #254
+# CHECK-NEXT: [0,1] D=eeeeER . ldr x1, [x27], #254
+# CHECK-NEXT: [0,2] D==eeeeER . ldr w1, [x27, #254]!
+# CHECK-NEXT: [0,3] D===eeeeER. ldr x1, [x27, #254]!
+# CHECK-NEXT: [0,4] D====eeeeER ldrb w1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr w1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldr w1, [x27, #254]!
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldr x1, [x27, #254]!
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldrb w1, [x27], #254
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [49] Code Region - G50
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eeeeER . ldrh w1, [x27], #254
+# CHECK-NEXT: [0,2] D==eeeeER . ldrh w1, [x27, #254]!
+# CHECK-NEXT: [0,3] D===eeeeER. ldrsb w1, [x27], #254
+# CHECK-NEXT: [0,4] D====eeeeER ldrsb x1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldrh w1, [x27], #254
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldrh w1, [x27, #254]!
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldrsb w1, [x27], #254
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldrsb x1, [x27], #254
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [50] Code Region - G51
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrsb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eeeeER . ldrsb x1, [x27, #254]!
+# CHECK-NEXT: [0,2] D==eeeeER . ldrsh w1, [x27], #254
+# CHECK-NEXT: [0,3] D===eeeeER. ldrsh x1, [x27], #254
+# CHECK-NEXT: [0,4] D====eeeeER ldrsh w1, [x27, #254]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldrsb x1, [x27, #254]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldrsh w1, [x27], #254
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldrsh x1, [x27], #254
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldrsh w1, [x27, #254]!
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [51] Code Region - G52
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.38
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 1.2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eeeeER. ldrsw x1, [x27], #254
+# CHECK-NEXT: [0,2] D==eeeeER ldrsw x1, [x27, #254]!
+# CHECK-NEXT: [0,3] D===eeE-R st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,4] .D===eeER st1 { v1.2d }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsh x1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldrsw x1, [x27], #254
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 4.0 0.0 1.0 st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 1 2.8 0.2 0.2 <total>
+
+# CHECK: [52] Code Region - G53
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,2] D==eeER . st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,4] .D===eeER st1 { v1.8h }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [53] Code Region - G54
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,2] D==eeER . st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,4] .D===eeER st1 { v1.4h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [54] Code Region - G55
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,2] D==eeER . st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,4] .D===eeER st1 { v1.1d, v2.1d }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [55] Code Region - G56
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.77
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,4] . D==eeER st1 { v1.8b, v2.8b }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [56] Code Region - G57
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.17
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeER st1 { v1.2s, v2.2s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [57] Code Region - G58
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.17
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeER st1 { v1.16b, v2.16b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [58] Code Region - G59
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 2900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.13
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] .DeeER . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,2] . DeeER . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,3] . D==eeER. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,4] . D==eeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 3. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1 1.8 0.4 0.0 <total>
+
+# CHECK: [59] Code Region - G60
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 3100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.41
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,1] .DeeER . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,2] . DeeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,3] . D=eeER. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,4] . D=eeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 3. 1 2.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 1 1.4 0.4 0.0 <total>
+
+# CHECK: [60] Code Region - G61
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 2900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.13
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . D=eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 1 2.0 0.4 0.0 <total>
+
+# CHECK: [61] Code Region - G62
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 3100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.40
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeER . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,2] . D=eeER . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] . D=eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,4] . D===eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 2. 1 2.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 4. 1 4.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1 2.0 0.6 0.0 <total>
+
+# CHECK: [62] Code Region - G63
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 804
+# CHECK-NEXT: Total uOps: 3700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.60
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,1] .DeeER .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,2] . D=eeER .. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] . D=eeER .. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,4] . D===eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 2. 1 2.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 4. 1 4.0 2.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1 2.0 0.8 0.0 <total>
+
+# CHECK: [63] Code Region - G64
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 3300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.69
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,2] .D==eeER . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . D==eeER. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.4 0.0 <total>
+
+# CHECK: [64] Code Region - G65
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 706
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.25
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,2] . D==eeeeER . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,3] . D===eeeeER. st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,4] . D===eeeeER st1 { v1.b }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 1 2.6 0.6 0.0 <total>
+
+# CHECK: [65] Code Region - G66
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.95
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeER . st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,2] .D=eeeeER . st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,3] .D==eeeeER. st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeER st1 { v1.h }[4], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [66] Code Region - G67
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 605
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.80
+# CHECK-NEXT: IPC: 0.83
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,1] D=eeeeER . st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeER . st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,3] .D==eeeeER. st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeER st2 { v1.2d, v2.2d }, [x27], #32
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [67] Code Region - G68
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 705
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.69
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER .. st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,1] D=eeeeER .. st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,2] .D=eeeeER .. st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,3] . D=eeeeER.. st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,4] . D==eeeeER st2 { v1.8h, v2.8h }, [x27], #32
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1 2.0 0.4 0.0 <total>
+
+# CHECK: [68] Code Region - G69
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 805
+# CHECK-NEXT: Total uOps: 2900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.60
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . . st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,1] .DeeeeER . . st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,2] . D==eeeeER . st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . D===eeeeER. st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,4] . D===eeeeER st2 { v1.4s, v2.4s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.6 0.0 <total>
+
+# CHECK: [69] Code Region - G70
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 706
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.68
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . . st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeER . . st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,2] . DeeeeER . . st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . D==eeeeER. st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,4] . D===eeeeER st2 { v1.b, v2.b }[8], [x27], #2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 2.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 1 2.0 0.6 0.0 <total>
+
+# CHECK: [70] Code Region - G71
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.95
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeER . st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeER . st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,3] .D==eeeeER. st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,4] . D==eeeeER st2 { v1.h, v2.h }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [71] Code Region - G72
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.95
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeER . st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,2] .D=eeeeER . st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeER. st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,4] . D==eeeeER st2 { v1.d, v2.d }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [72] Code Region - G73
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 807
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.72
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . st2g x26, [x27], #4064
+# CHECK-NEXT: [0,1] D=eER. . . st2g x26, [x27, #4064]!
+# CHECK-NEXT: [0,2] .D=eeeeeeER . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,3] . D=eeeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,4] . D====eeeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2g x26, [x27], #4064
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2g x26, [x27, #4064]!
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 4. 1 5.0 3.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1 2.4 0.8 0.0 <total>
+
+# CHECK: [73] Code Region - G74
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1405
+# CHECK-NEXT: Total uOps: 4700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.35
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,1] .DeeeeeER . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,2] . D===eeeeeeER . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,3] . D===eeeeeeER. . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,4] . D=======eeeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 2. 1 4.0 3.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 4. 1 8.0 4.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 1 3.6 1.6 0.0 <total>
+
+# CHECK: [74] Code Region - G75
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1206
+# CHECK-NEXT: Total uOps: 4100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.40
+# CHECK-NEXT: IPC: 0.41
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234567
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeER . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,2] . D==eeeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . D==eeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . D=====eeeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 6.0 3.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 1 2.8 1.2 0.0 <total>
+
+# CHECK: [75] Code Region - G76
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1106
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.44
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 11.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . .. st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeER . .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,2] . D===eeeeeER .. st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,3] . D===eeeeeER .. st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D=====eeeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 2. 1 4.0 3.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 4. 1 6.0 2.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 1 3.2 1.2 0.0 <total>
+
+# CHECK: [76] Code Region - G77
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1005
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.48
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeER . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,1] .DeeeeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,2] . D==eeeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D==eeeeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,4] . D====eeeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 4. 1 5.0 2.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 1 2.6 1.0 0.0 <total>
+
+# CHECK: [77] Code Region - G78
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1304
+# CHECK-NEXT: Total uOps: 4300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.30
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 13.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeER . .. st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeER . .. st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,2] . D==eeeeeER .. st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D==eeeeeER .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,4] . D===eeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 4. 1 4.0 2.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1 2.4 1.0 0.0 <total>
+
+# CHECK: [78] Code Region - G79
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2399
+# CHECK-NEXT: Total uOps: 6900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.88
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 24.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeeeER . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] .DeeeeeeeER . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,2] . D===eeeeeeER. . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,3] . D========eeeeeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,4] . .D=======eeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 2. 1 4.0 4.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 3. 1 9.0 5.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1 4.6 2.0 0.0 <total>
+
+# CHECK: [79] Code Region - G80
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1904
+# CHECK-NEXT: Total uOps: 5700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.99
+# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: Block RThroughput: 19.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeeER . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeeeeeER . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,2] . D=====eeeeeeER . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . D=====eeeeeeeER . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,4] . .D========eeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 2. 1 6.0 5.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 6.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 4. 1 9.0 4.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1 4.6 2.2 0.0 <total>
+
+# CHECK: [80] Code Region - G81
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1658
+# CHECK-NEXT: Total uOps: 4900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: Block RThroughput: 16.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeER. . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeeeeeeER . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,2] . D========eeeeeeER . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,3] . D==========eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,4] . D==========eeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 2. 1 9.0 9.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 3. 1 11.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 1 6.6 2.4 0.0 <total>
+
+# CHECK: [81] Code Region - G82
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 757
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.30
+# CHECK-NEXT: IPC: 0.66
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,2] .D==eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,3] .D===eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D====eeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 4. 1 5.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 1 3.0 0.6 0.0 <total>
+
+# CHECK: [82] Code Region - G83
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 2700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.84
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] D=eeeeeeER. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,2] .D==eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,3] . D==eeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D===eE--R stg x26, [x27], #4064
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 2.0 stg x26, [x27], #4064
+# CHECK-NEXT: 1 2.6 0.4 0.4 <total>
+
+# CHECK: [83] Code Region - G84
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.37
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . stg x26, [x27, #4064]!
+# CHECK-NEXT: [0,1] D=eER. . stgp x1, x2, [x27], #992
+# CHECK-NEXT: [0,2] D==eER . stgp x1, x2, [x27, #992]!
+# CHECK-NEXT: [0,3] .D==eeER. stp s1, s2, [x27], #248
+# CHECK-NEXT: [0,4] .D===eeER stp d1, d2, [x27], #496
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stg x26, [x27, #4064]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 stgp x1, x2, [x27], #992
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stgp x1, x2, [x27, #992]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 stp s1, s2, [x27], #248
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 stp d1, d2, [x27], #496
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [84] Code Region - G85
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.84
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . stp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] D==eeER . stp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,2] .D==eeER . stp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,3] .D===eeER. stp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,4] . D====eER stp w1, w2, [x27], #248
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 stp s1, s2, [x27, #248]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stp d1, d2, [x27, #496]!
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 stp q1, q2, [x27, #992]!
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 stp w1, w2, [x27], #248
+# CHECK-NEXT: 1 3.2 0.2 0.0 <total>
+
+# CHECK: [85] Code Region - G86
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.37
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] D=eER. . stp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,2] D==eER . stp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,3] .D==eeER. str b1, [x27], #254
+# CHECK-NEXT: [0,4] .D===eeER str h1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 stp w1, w2, [x27, #248]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stp x1, x2, [x27, #496]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 str b1, [x27], #254
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 str h1, [x27], #254
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [86] Code Region - G87
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.97
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . str s1, [x27], #254
+# CHECK-NEXT: [0,1] D=eeER . str d1, [x27], #254
+# CHECK-NEXT: [0,2] .D=eeER . str q1, [x27], #254
+# CHECK-NEXT: [0,3] .D==eeER. str b1, [x27, #254]!
+# CHECK-NEXT: [0,4] . D==eeER str h1, [x27, #254]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 str d1, [x27], #254
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str q1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 str b1, [x27, #254]!
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 str h1, [x27, #254]!
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [87] Code Region - G88
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 503
+# CHECK-NEXT: Total uOps: 1800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.58
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeER. . str s1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eeER . str d1, [x27, #254]!
+# CHECK-NEXT: [0,2] .D=eeER. str q1, [x27, #254]!
+# CHECK-NEXT: [0,3] .D==eER. str w1, [x27], #254
+# CHECK-NEXT: [0,4] .D===eER str x1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 str d1, [x27, #254]!
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str q1, [x27, #254]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 str w1, [x27], #254
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 str x1, [x27], #254
+# CHECK-NEXT: 1 2.4 0.2 0.0 <total>
+
+# CHECK: [88] Code Region - G89
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeER . . str w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER. . str x1, [x27, #254]!
+# CHECK-NEXT: [0,2] D==eER . strb w1, [x27], #254
+# CHECK-NEXT: [0,3] .D==eER. strb w1, [x27, #254]!
+# CHECK-NEXT: [0,4] .D===eER strh w1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 str x1, [x27, #254]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 strb w1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 strb w1, [x27, #254]!
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 strh w1, [x27], #254
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [89] Code Region - G90
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeER . . strh w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER. . stz2g x26, [x27], #4064
+# CHECK-NEXT: [0,2] D==eER . stz2g x26, [x27, #4064]!
+# CHECK-NEXT: [0,3] .D==eER. stzg x26, [x27], #4064
+# CHECK-NEXT: [0,4] .D===eER stzg x26, [x27, #4064]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 strh w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 stz2g x26, [x27], #4064
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stz2g x26, [x27, #4064]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 stzg x26, [x27], #4064
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 stzg x26, [x27, #4064]!
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [90] Code Region - G91
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 110
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.64
+# CHECK-NEXT: IPC: 1.82
+# CHECK-NEXT: Block RThroughput: 0.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldr x1, [x27], #254
+# CHECK-NEXT: [0,1] D====eeeeER ldr x2, [x1], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 ldr x2, [x1], #254
+# CHECK-NEXT: 1 3.0 0.5 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-zero-lat-movs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-zero-lat-movs.s
new file mode 100644
index 0000000..1eef230
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-zero-lat-movs.s
@@ -0,0 +1,83 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3 -instruction-tables < %s | FileCheck %s
+
+mov x1, #0
+mov x1, xzr
+mov w1, #0
+mov w1, wzr
+fmov h1, wzr
+fmov h1, xzr
+fmov s1, wzr
+fmov d1, xzr
+movi d1, #0
+movi v1.2d, #0
+mov w1, w2
+mov x1, x2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.10 mov x1, #0
+# CHECK-NEXT: 1 0 0.10 mov x1, xzr
+# CHECK-NEXT: 1 0 0.10 mov w1, #0
+# CHECK-NEXT: 1 0 0.10 mov w1, wzr
+# CHECK-NEXT: 1 0 0.10 fmov h1, wzr
+# CHECK-NEXT: 1 0 0.10 fmov h1, xzr
+# CHECK-NEXT: 1 0 0.10 fmov s1, wzr
+# CHECK-NEXT: 1 0 0.10 fmov d1, xzr
+# CHECK-NEXT: 1 0 0.10 movi d1, #0000000000000000
+# CHECK-NEXT: 1 0 0.10 movi v1.2d, #0000000000000000
+# CHECK-NEXT: 1 0 0.10 mov w1, w2
+# CHECK-NEXT: 1 0 0.10 mov x1, x2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3UnitB
+# CHECK-NEXT: [0.1] - V3UnitB
+# CHECK-NEXT: [0.2] - V3UnitB
+# CHECK-NEXT: [1.0] - V3UnitD
+# CHECK-NEXT: [1.1] - V3UnitD
+# CHECK-NEXT: [2.0] - V3UnitFlg
+# CHECK-NEXT: [2.1] - V3UnitFlg
+# CHECK-NEXT: [2.2] - V3UnitFlg
+# CHECK-NEXT: [2.3] - V3UnitFlg
+# CHECK-NEXT: [3.0] - V3UnitL12
+# CHECK-NEXT: [3.1] - V3UnitL12
+# CHECK-NEXT: [4] - V3UnitLS0
+# CHECK-NEXT: [5] - V3UnitM0
+# CHECK-NEXT: [6] - V3UnitM1
+# CHECK-NEXT: [7] - V3UnitS0
+# CHECK-NEXT: [8] - V3UnitS1
+# CHECK-NEXT: [9] - V3UnitS2
+# CHECK-NEXT: [10] - V3UnitS3
+# CHECK-NEXT: [11] - V3UnitS4
+# CHECK-NEXT: [12] - V3UnitS5
+# CHECK-NEXT: [13] - V3UnitST1
+# CHECK-NEXT: [14] - V3UnitV0
+# CHECK-NEXT: [15] - V3UnitV1
+# CHECK-NEXT: [16] - V3UnitV2
+# CHECK-NEXT: [17] - V3UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mov x1, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mov x1, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mov w1, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mov w1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - fmov h1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - fmov h1, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - fmov s1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - fmov d1, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - movi d1, #0000000000000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - movi v1.2d, #0000000000000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mov w1, w2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - - - mov x1, x2
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-basic-instructions.s
new file mode 100644
index 0000000..7ab2be5
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-basic-instructions.s
@@ -0,0 +1,3777 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3ae -instruction-tables < %s | FileCheck %s
+
+#------------------------------------------------------------------------------
+# Add/sub (immediate)
+#------------------------------------------------------------------------------
+
+add w2, w3, #4095
+add w30, w29, #1, lsl #12
+add w13, w5, #4095, lsl #12
+add x5, x7, #1638
+add w20, wsp, #801
+add wsp, wsp, #1104
+add wsp, w30, #4084
+add x0, x24, #291
+add x3, x24, #4095, lsl #12
+add x8, sp, #1074
+add sp, x29, #3816
+sub w0, wsp, #4077
+sub w4, w20, #546, lsl #12
+sub sp, sp, #288
+sub wsp, w19, #16
+adds w13, w23, #291, lsl #12
+cmn w2, #4095
+adds w20, wsp, #0
+cmn x3, #1, lsl #12
+cmp sp, #20, lsl #12
+cmp x30, #4095
+subs x4, sp, #3822
+cmn w3, #291, lsl #12
+cmn wsp, #1365
+cmn sp, #1092, lsl #12
+mov sp, x30
+mov wsp, w20
+mov x11, sp
+mov w24, wsp
+
+#------------------------------------------------------------------------------
+# Add-subtract (shifted register)
+#------------------------------------------------------------------------------
+
+add w3, w5, w7
+add wzr, w3, w5
+add w20, wzr, w4
+add w4, w6, wzr
+add w11, w13, w15
+add w9, w3, wzr, lsl #10
+add w17, w29, w20, lsl #31
+add w21, w22, w23, lsr #0
+add w24, w25, w26, lsr #18
+add w27, w28, w29, lsr #31
+add w2, w3, w4, asr #0
+add w5, w6, w7, asr #21
+add w8, w9, w10, asr #31
+add x3, x5, x7
+add xzr, x3, x5
+add x20, xzr, x4
+add x4, x6, xzr
+add x11, x13, x15
+add x9, x3, xzr, lsl #10
+add x17, x29, x20, lsl #63
+add x21, x22, x23, lsr #0
+add x24, x25, x26, lsr #18
+add x27, x28, x29, lsr #63
+add x2, x3, x4, asr #0
+add x5, x6, x7, asr #21
+add x8, x9, x10, asr #63
+adds w3, w5, w7
+cmn w3, w5
+adds w20, wzr, w4
+adds w4, w6, wzr
+adds w11, w13, w15
+adds w9, w3, wzr, lsl #10
+adds w17, w29, w20, lsl #31
+adds w21, w22, w23, lsr #0
+adds w24, w25, w26, lsr #18
+adds w27, w28, w29, lsr #31
+adds w2, w3, w4, asr #0
+adds w5, w6, w7, asr #21
+adds w8, w9, w10, asr #31
+adds x3, x5, x7
+cmn x3, x5
+adds x20, xzr, x4
+adds x4, x6, xzr
+adds x11, x13, x15
+adds x9, x3, xzr, lsl #10
+adds x17, x29, x20, lsl #63
+adds x21, x22, x23, lsr #0
+adds x24, x25, x26, lsr #18
+adds x27, x28, x29, lsr #63
+adds x2, x3, x4, asr #0
+adds x5, x6, x7, asr #21
+adds x8, x9, x10, asr #63
+sub w3, w5, w7
+sub wzr, w3, w5
+sub w4, w6, wzr
+sub w11, w13, w15
+sub w9, w3, wzr, lsl #10
+sub w17, w29, w20, lsl #31
+sub w21, w22, w23, lsr #0
+sub w24, w25, w26, lsr #18
+sub w27, w28, w29, lsr #31
+sub w2, w3, w4, asr #0
+sub w5, w6, w7, asr #21
+sub w8, w9, w10, asr #31
+sub x3, x5, x7
+sub xzr, x3, x5
+sub x4, x6, xzr
+sub x11, x13, x15
+sub x9, x3, xzr, lsl #10
+sub x17, x29, x20, lsl #63
+sub x21, x22, x23, lsr #0
+sub x24, x25, x26, lsr #18
+sub x27, x28, x29, lsr #63
+sub x2, x3, x4, asr #0
+sub x5, x6, x7, asr #21
+sub x8, x9, x10, asr #63
+subs w3, w5, w7
+cmp w3, w5
+subs w4, w6, wzr
+subs w11, w13, w15
+subs w9, w3, wzr, lsl #10
+subs w17, w29, w20, lsl #31
+subs w21, w22, w23, lsr #0
+subs w24, w25, w26, lsr #18
+subs w27, w28, w29, lsr #31
+subs w2, w3, w4, asr #0
+subs w5, w6, w7, asr #21
+subs w8, w9, w10, asr #31
+subs x3, x5, x7
+cmp x3, x5
+subs x4, x6, xzr
+subs x11, x13, x15
+subs x9, x3, xzr, lsl #10
+subs x17, x29, x20, lsl #63
+subs x21, x22, x23, lsr #0
+subs x24, x25, x26, lsr #18
+subs x27, x28, x29, lsr #63
+subs x2, x3, x4, asr #0
+subs x5, x6, x7, asr #21
+subs x8, x9, x10, asr #63
+cmn wzr, w4
+cmn w5, wzr
+cmn w6, w7
+cmn w8, w9, lsl #15
+cmn w10, w11, lsl #31
+cmn w12, w13, lsr #0
+cmn w14, w15, lsr #21
+cmn w16, w17, lsr #31
+cmn w18, w19, asr #0
+cmn w20, w21, asr #22
+cmn w22, w23, asr #31
+cmn x0, x3
+cmn xzr, x4
+cmn x5, xzr
+cmn x6, x7
+cmn x8, x9, lsl #15
+cmn x10, x11, lsl #63
+cmn x12, x13, lsr #0
+cmn x14, x15, lsr #41
+cmn x16, x17, lsr #63
+cmn x18, x19, asr #0
+cmn x20, x21, asr #55
+cmn x22, x23, asr #63
+cmp w0, w3
+cmp wzr, w4
+cmp w5, wzr
+cmp w6, w7
+cmp w8, w9, lsl #15
+cmp w10, w11, lsl #31
+cmp w12, w13, lsr #0
+cmp w14, w15, lsr #21
+cmp w18, w19, asr #0
+cmp w20, w21, asr #22
+cmp w22, w23, asr #31
+cmp x0, x3
+cmp xzr, x4
+cmp x5, xzr
+cmp x6, x7
+cmp x8, x9, lsl #15
+cmp x10, x11, lsl #63
+cmp x12, x13, lsr #0
+cmp x14, x15, lsr #41
+cmp x16, x17, lsr #63
+cmp x18, x19, asr #0
+cmp x20, x21, asr #55
+cmp x22, x23, asr #63
+cmp wzr, w0
+cmp xzr, x0
+
+#------------------------------------------------------------------------------
+# Add-subtract (shifted register)
+#------------------------------------------------------------------------------
+
+adc w29, w27, w25
+adc wzr, w3, w4
+adc w9, wzr, w10
+adc w20, w0, wzr
+adc x29, x27, x25
+adc xzr, x3, x4
+adc x9, xzr, x10
+adc x20, x0, xzr
+adcs w29, w27, w25
+adcs wzr, w3, w4
+adcs w9, wzr, w10
+adcs w20, w0, wzr
+adcs x29, x27, x25
+adcs xzr, x3, x4
+adcs x9, xzr, x10
+adcs x20, x0, xzr
+sbc w29, w27, w25
+sbc wzr, w3, w4
+ngc w9, w10
+sbc w20, w0, wzr
+sbc x29, x27, x25
+sbc xzr, x3, x4
+ngc x9, x10
+sbc x20, x0, xzr
+sbcs w29, w27, w25
+sbcs wzr, w3, w4
+ngcs w9, w10
+sbcs w20, w0, wzr
+sbcs x29, x27, x25
+sbcs xzr, x3, x4
+ngcs x9, x10
+sbcs x20, x0, xzr
+ngc w3, w12
+ngc wzr, w9
+ngc w23, wzr
+ngc x29, x30
+ngc xzr, x0
+ngc x0, xzr
+ngcs w3, w12
+ngcs wzr, w9
+ngcs w23, wzr
+ngcs x29, x30
+ngcs xzr, x0
+ngcs x0, xzr
+
+#------------------------------------------------------------------------------
+# Compare and branch (immediate)
+#------------------------------------------------------------------------------
+
+sbfx x1, x2, #3, #2
+asr x3, x4, #63
+asr wzr, wzr, #31
+sbfx w12, w9, #0, #1
+ubfiz x4, x5, #52, #11
+ubfx xzr, x4, #0, #1
+ubfiz x4, xzr, #1, #6
+lsr x5, x6, #12
+bfi x4, x5, #52, #11
+bfxil xzr, x4, #0, #1
+bfi x4, xzr, #1, #6
+bfxil x5, x6, #12, #52
+sxtb w1, w2
+sxtb xzr, w3
+sxth w9, w10
+sxth x0, w1
+sxtw x3, w30
+uxtb w1, w2
+uxth w9, w10
+ubfx x3, x30, #0, #32
+asr w3, w2, #0
+asr w9, w10, #31
+asr x20, x21, #63
+asr w1, wzr, #3
+lsr w3, w2, #0
+lsr w9, w10, #31
+lsr x20, x21, #63
+lsr wzr, wzr, #3
+lsr w3, w2, #0
+lsl w9, w10, #31
+lsl x20, x21, #63
+lsl w1, wzr, #3
+sbfx w9, w10, #0, #1
+sbfiz x2, x3, #63, #1
+asr x19, x20, #0
+sbfiz x9, x10, #5, #59
+asr w9, w10, #0
+sbfiz w11, w12, #31, #1
+sbfiz w13, w14, #29, #3
+sbfiz xzr, xzr, #10, #11
+sbfx w9, w10, #0, #1
+asr x2, x3, #63
+asr x19, x20, #0
+asr x9, x10, #5
+asr w9, w10, #0
+asr w11, w12, #31
+asr w13, w14, #29
+sbfx xzr, xzr, #10, #11
+bfxil w9, w10, #0, #1
+bfi x2, x3, #63, #1
+bfxil x19, x20, #0, #64
+bfi x9, x10, #5, #59
+bfxil w9, w10, #0, #32
+bfi w11, w12, #31, #1
+bfi w13, w14, #29, #3
+bfi xzr, xzr, #10, #11
+bfxil w9, w10, #0, #1
+bfxil x2, x3, #63, #1
+bfxil x19, x20, #0, #64
+bfxil x9, x10, #5, #59
+bfxil w9, w10, #0, #32
+bfxil w11, w12, #31, #1
+bfxil w13, w14, #29, #3
+bfxil xzr, xzr, #10, #11
+ubfx w9, w10, #0, #1
+lsl x2, x3, #63
+lsr x19, x20, #0
+lsl x9, x10, #5
+lsr w9, w10, #0
+lsl w11, w12, #31
+lsl w13, w14, #29
+ubfiz xzr, xzr, #10, #11
+ubfx w9, w10, #0, #1
+lsr x2, x3, #63
+lsr x19, x20, #0
+lsr x9, x10, #5
+lsr w9, w10, #0
+lsr w11, w12, #31
+lsr w13, w14, #29
+ubfx xzr, xzr, #10, #11
+
+#------------------------------------------------------------------------------
+# Compare and branch (immediate)
+#------------------------------------------------------------------------------
+
+cbz w5, #4
+cbz x5, #0
+cbnz x2, #-4
+cbnz x26, #1048572
+cbz wzr, #0
+cbnz xzr, #0
+
+#------------------------------------------------------------------------------
+# Conditional branch (immediate)
+#------------------------------------------------------------------------------
+
+b.ne #4
+b.ge #1048572
+b.ge #-4
+
+#------------------------------------------------------------------------------
+# Conditional compare (immediate)
+#------------------------------------------------------------------------------
+
+ccmp w1, #31, #0, eq
+ccmp w3, #0, #15, hs
+ccmp wzr, #15, #13, hs
+ccmp x9, #31, #0, le
+ccmp x3, #0, #15, gt
+ccmp xzr, #5, #7, ne
+ccmn w1, #31, #0, eq
+ccmn w3, #0, #15, hs
+ccmn wzr, #15, #13, hs
+ccmn x9, #31, #0, le
+ccmn x3, #0, #15, gt
+ccmn xzr, #5, #7, ne
+
+#------------------------------------------------------------------------------
+# Conditional compare (register)
+#------------------------------------------------------------------------------
+
+ccmp w1, wzr, #0, eq
+ccmp w3, w0, #15, hs
+ccmp wzr, w15, #13, hs
+ccmp x9, xzr, #0, le
+ccmp x3, x0, #15, gt
+ccmp xzr, x5, #7, ne
+ccmn w1, wzr, #0, eq
+ccmn w3, w0, #15, hs
+ccmn wzr, w15, #13, hs
+ccmn x9, xzr, #0, le
+ccmn x3, x0, #15, gt
+ccmn xzr, x5, #7, ne
+
+#------------------------------------------------------------------------------
+# Conditional branch (immediate)
+#------------------------------------------------------------------------------
+
+csel w1, w0, w19, ne
+csel wzr, w5, w9, eq
+csel w9, wzr, w30, gt
+csel w1, w28, wzr, mi
+csel x19, x23, x29, lt
+csel xzr, x3, x4, ge
+csel x5, xzr, x6, hs
+csel x7, x8, xzr, lo
+csinc w1, w0, w19, ne
+csinc wzr, w5, w9, eq
+csinc w9, wzr, w30, gt
+csinc w1, w28, wzr, mi
+csinc x19, x23, x29, lt
+csinc xzr, x3, x4, ge
+csinc x5, xzr, x6, hs
+csinc x7, x8, xzr, lo
+csinv w1, w0, w19, ne
+csinv wzr, w5, w9, eq
+csinv w9, wzr, w30, gt
+csinv w1, w28, wzr, mi
+csinv x19, x23, x29, lt
+csinv xzr, x3, x4, ge
+csinv x5, xzr, x6, hs
+csinv x7, x8, xzr, lo
+csneg w1, w0, w19, ne
+csneg wzr, w5, w9, eq
+csneg w9, wzr, w30, gt
+csneg w1, w28, wzr, mi
+csneg x19, x23, x29, lt
+csneg xzr, x3, x4, ge
+csneg x5, xzr, x6, hs
+csneg x7, x8, xzr, lo
+cset w3, eq
+cset x9, pl
+csetm w20, ne
+csetm x30, ge
+csinc w2, wzr, wzr, al
+csinv x3, xzr, xzr, nv
+cinc w3, w5, gt
+cinc wzr, w4, le
+cset w9, lt
+cinc x3, x5, gt
+cinc xzr, x4, le
+cset x9, lt
+csinc w5, w6, w6, nv
+csinc x1, x2, x2, al
+cinv w3, w5, gt
+cinv wzr, w4, le
+csetm w9, lt
+cinv x3, x5, gt
+cinv xzr, x4, le
+csetm x9, lt
+csinv x1, x0, x0, al
+csinv w9, w8, w8, nv
+cneg w3, w5, gt
+cneg wzr, w4, le
+cneg w9, wzr, lt
+cneg x3, x5, gt
+cneg xzr, x4, le
+cneg x9, xzr, lt
+csneg x4, x8, x8, al
+csinv w9, w8, w8, nv
+
+#------------------------------------------------------------------------------
+# Data-processing (1 source)
+#------------------------------------------------------------------------------
+
+rbit w0, w7
+rbit x18, x3
+rev16 w17, w1
+rev16 x5, x2
+rev w18, w0
+rev32 x20, x1
+rev x22, x2
+clz w24, w3
+clz x26, x4
+cls w3, w5
+cls x20, x5
+
+#------------------------------------------------------------------------------
+# Data-processing (2 source)
+#------------------------------------------------------------------------------
+
+udiv w0, w7, w10
+udiv x9, x22, x4
+sdiv w12, w21, w0
+sdiv x13, x2, x1
+lsl w11, w12, w13
+lsl x14, x15, x16
+lsr w17, w18, w19
+lsr x20, x21, x22
+asr w23, w24, w25
+asr x26, x27, x28
+ror w0, w1, w2
+ror x3, x4, x5
+lsl w6, w7, w8
+lsl x9, x10, x11
+lsr w12, w13, w14
+lsr x15, x16, x17
+asr w18, w19, w20
+asr x21, x22, x23
+ror w24, w25, w26
+ror x27, x28, x29
+
+#------------------------------------------------------------------------------
+# Data-processing (3 sources)
+#------------------------------------------------------------------------------
+
+smulh x30, x29, x28
+smulh xzr, x27, x26
+umulh x30, x29, x28
+umulh x23, x30, xzr
+madd w1, w3, w7, w4
+madd wzr, w0, w9, w11
+madd w13, wzr, w4, w4
+madd w19, w30, wzr, w29
+mul w4, w5, w6
+madd x1, x3, x7, x4
+madd xzr, x0, x9, x11
+madd x13, xzr, x4, x4
+madd x19, x30, xzr, x29
+mul x4, x5, x6
+msub w1, w3, w7, w4
+msub wzr, w0, w9, w11
+msub w13, wzr, w4, w4
+msub w19, w30, wzr, w29
+mneg w4, w5, w6
+msub x1, x3, x7, x4
+msub xzr, x0, x9, x11
+msub x13, xzr, x4, x4
+msub x19, x30, xzr, x29
+mneg x4, x5, x6
+smaddl x3, w5, w2, x9
+smaddl xzr, w10, w11, x12
+smaddl x13, wzr, w14, x15
+smaddl x16, w17, wzr, x18
+smull x19, w20, w21
+smsubl x3, w5, w2, x9
+smsubl xzr, w10, w11, x12
+smsubl x13, wzr, w14, x15
+smsubl x16, w17, wzr, x18
+smnegl x19, w20, w21
+umaddl x3, w5, w2, x9
+umaddl xzr, w10, w11, x12
+umaddl x13, wzr, w14, x15
+umaddl x16, w17, wzr, x18
+umull x19, w20, w21
+umsubl x3, w5, w2, x9
+umsubl x16, w17, wzr, x18
+umnegl x19, w20, w21
+smulh x30, x29, x28
+smulh x23, x22, xzr
+umulh x23, x22, xzr
+mul x19, x20, xzr
+mneg w21, w22, w23
+smull x11, w13, w17
+umull x11, w13, w17
+smnegl x11, w13, w17
+umnegl x11, w13, w17
+
+#------------------------------------------------------------------------------
+# Extract (immediate)
+#------------------------------------------------------------------------------
+
+extr w3, w5, w7, #0
+extr w11, w13, w17, #31
+extr x3, x5, x7, #15
+extr x11, x13, x17, #63
+ror x19, x23, #24
+ror x29, xzr, #63
+ror w9, w13, #31
+
+#------------------------------------------------------------------------------
+# Floating-point compare
+#------------------------------------------------------------------------------
+
+fcmp s3, s5
+fcmp s31, #0.0
+fcmp s31, #0.0
+fcmpe s29, s30
+fcmpe s15, #0.0
+fcmpe s15, #0.0
+fcmp d4, d12
+fcmp d23, #0.0
+fcmp d23, #0.0
+fcmpe d26, d22
+fcmpe d29, #0.0
+fcmpe d29, #0.0
+
+#------------------------------------------------------------------------------
+# Floating-point conditional compare
+#------------------------------------------------------------------------------
+
+fccmp s1, s31, #0, eq
+fccmp s3, s0, #15, hs
+fccmp s31, s15, #13, hs
+fccmp d9, d31, #0, le
+fccmp d3, d0, #15, gt
+fccmp d31, d5, #7, ne
+fccmpe s1, s31, #0, eq
+fccmpe s3, s0, #15, hs
+fccmpe s31, s15, #13, hs
+fccmpe d9, d31, #0, le
+fccmpe d3, d0, #15, gt
+fccmpe d31, d5, #7, ne
+
+#-------------------------------------------------------------------------------
+# Floating-point conditional compare
+#-------------------------------------------------------------------------------
+
+fcsel s3, s20, s9, pl
+fcsel d9, d10, d11, mi
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (1 source)
+#------------------------------------------------------------------------------
+
+fmov s0, s1
+fabs s2, s3
+fneg s4, s5
+fsqrt s6, s7
+fcvt d8, s9
+fcvt h10, s11
+frintn s12, s13
+frintp s14, s15
+frintm s16, s17
+frintz s18, s19
+frinta s20, s21
+frintx s22, s23
+frinti s24, s25
+fmov d0, d1
+fabs d2, d3
+fneg d4, d5
+fsqrt d6, d7
+fcvt s8, d9
+fcvt h10, d11
+frintn d12, d13
+frintp d14, d15
+frintm d16, d17
+frintz d18, d19
+frinta d20, d21
+frintx d22, d23
+frinti d24, d25
+fcvt s26, h27
+fcvt d28, h29
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (2 sources)
+#------------------------------------------------------------------------------
+
+fmul s20, s19, s17
+fdiv s1, s2, s3
+fadd s4, s5, s6
+fsub s7, s8, s9
+fmax s10, s11, s12
+fmin s13, s14, s15
+fmaxnm s16, s17, s18
+fminnm s19, s20, s21
+fnmul s22, s23, s2
+fmul d20, d19, d17
+fdiv d1, d2, d3
+fadd d4, d5, d6
+fsub d7, d8, d9
+fmax d10, d11, d12
+fmin d13, d14, d15
+fmaxnm d16, d17, d18
+fminnm d19, d20, d21
+fnmul d22, d23, d24
+
+#------------------------------------------------------------------------------
+# Floating-point data-processing (1 source)
+#------------------------------------------------------------------------------
+
+fmadd s3, s5, s6, s31
+fmadd d3, d13, d0, d23
+fmsub s3, s5, s6, s31
+fmsub d3, d13, d0, d23
+fnmadd s3, s5, s6, s31
+fnmadd d3, d13, d0, d23
+fnmsub s3, s5, s6, s31
+fnmsub d3, d13, d0, d23
+
+#------------------------------------------------------------------------------
+# Floating-point <-> fixed-point conversion
+#------------------------------------------------------------------------------
+
+fcvtzs w3, h5, #1
+fcvtzs wzr, h20, #13
+fcvtzs w19, h0, #32
+fcvtzs x3, h5, #1
+fcvtzs x12, h30, #45
+fcvtzs x19, h0, #64
+fcvtzs w3, s5, #1
+fcvtzs wzr, s20, #13
+fcvtzs w19, s0, #32
+fcvtzs x3, s5, #1
+fcvtzs x12, s30, #45
+fcvtzs x19, s0, #64
+fcvtzs w3, d5, #1
+fcvtzs wzr, d20, #13
+fcvtzs w19, d0, #32
+fcvtzs x3, d5, #1
+fcvtzs x12, d30, #45
+fcvtzs x19, d0, #64
+fcvtzu w3, h5, #1
+fcvtzu wzr, h20, #13
+fcvtzu w19, h0, #32
+fcvtzu x3, h5, #1
+fcvtzu x12, h30, #45
+fcvtzu x19, h0, #64
+fcvtzu w3, s5, #1
+fcvtzu wzr, s20, #13
+fcvtzu w19, s0, #32
+fcvtzu x3, s5, #1
+fcvtzu x12, s30, #45
+fcvtzu x19, s0, #64
+fcvtzu w3, d5, #1
+fcvtzu wzr, d20, #13
+fcvtzu w19, d0, #32
+fcvtzu x3, d5, #1
+fcvtzu x12, d30, #45
+fcvtzu x19, d0, #64
+scvtf h23, w19, #1
+scvtf h31, wzr, #20
+scvtf h14, w0, #32
+scvtf h23, x19, #1
+scvtf h31, xzr, #20
+scvtf h14, x0, #64
+scvtf s23, w19, #1
+scvtf s31, wzr, #20
+scvtf s14, w0, #32
+scvtf s23, x19, #1
+scvtf s31, xzr, #20
+scvtf s14, x0, #64
+scvtf d23, w19, #1
+scvtf d31, wzr, #20
+scvtf d14, w0, #32
+scvtf d23, x19, #1
+scvtf d31, xzr, #20
+scvtf d14, x0, #64
+ucvtf h23, w19, #1
+ucvtf h31, wzr, #20
+ucvtf h14, w0, #32
+ucvtf h23, x19, #1
+ucvtf h31, xzr, #20
+ucvtf h14, x0, #64
+ucvtf s23, w19, #1
+ucvtf s31, wzr, #20
+ucvtf s14, w0, #32
+ucvtf s23, x19, #1
+ucvtf s31, xzr, #20
+ucvtf s14, x0, #64
+ucvtf d23, w19, #1
+ucvtf d31, wzr, #20
+ucvtf d14, w0, #32
+ucvtf d23, x19, #1
+ucvtf d31, xzr, #20
+ucvtf d14, x0, #64
+
+#------------------------------------------------------------------------------
+# Floating-point <-> integer conversion
+#------------------------------------------------------------------------------
+
+fcvtns w3, h31
+fcvtns xzr, h12
+fcvtnu wzr, h12
+fcvtnu x0, h0
+fcvtps wzr, h9
+fcvtps x12, h20
+fcvtpu w30, h23
+fcvtpu x29, h3
+fcvtms w2, h3
+fcvtms x4, h5
+fcvtmu w6, h7
+fcvtmu x8, h9
+fcvtzs w10, h11
+fcvtzs x12, h13
+fcvtzu w14, h15
+fcvtzu x15, h16
+scvtf h17, w18
+scvtf h19, x20
+ucvtf h21, w22
+scvtf h23, x24
+fcvtas w25, h26
+fcvtas x27, h28
+fcvtau w29, h30
+fcvtau xzr, h0
+fcvtns w3, s31
+fcvtns xzr, s12
+fcvtnu wzr, s12
+fcvtnu x0, s0
+fcvtps wzr, s9
+fcvtps x12, s20
+fcvtpu w30, s23
+fcvtpu x29, s3
+fcvtms w2, s3
+fcvtms x4, s5
+fcvtmu w6, s7
+fcvtmu x8, s9
+fcvtzs w10, s11
+fcvtzs x12, s13
+fcvtzu w14, s15
+fcvtzu x15, s16
+scvtf s17, w18
+scvtf s19, x20
+ucvtf s21, w22
+scvtf s23, x24
+fcvtas w25, s26
+fcvtas x27, s28
+fcvtau w29, s30
+fcvtau xzr, s0
+fcvtns w3, d31
+fcvtns xzr, d12
+fcvtnu wzr, d12
+fcvtnu x0, d0
+fcvtps wzr, d9
+fcvtps x12, d20
+fcvtpu w30, d23
+fcvtpu x29, d3
+fcvtms w2, d3
+fcvtms x4, d5
+fcvtmu w6, d7
+fcvtmu x8, d9
+fcvtzs w10, d11
+fcvtzs x12, d13
+fcvtzu w14, d15
+fcvtzu x15, d16
+scvtf d17, w18
+scvtf d19, x20
+ucvtf d21, w22
+ucvtf d23, x24
+fcvtas w25, d26
+fcvtas x27, d28
+fcvtau w29, d30
+fcvtau xzr, d0
+fmov w3, s9
+fmov s9, w3
+fmov x20, d31
+fmov d1, x15
+fmov x3, v12.d[1]
+fmov v1.d[1], x19
+
+#------------------------------------------------------------------------------
+# Floating-point immediate
+#------------------------------------------------------------------------------
+
+fmov s2, #0.12500000
+fmov s3, #1.00000000
+fmov d30, #16.00000000
+fmov s4, #1.06250000
+fmov d10, #1.93750000
+fmov s12, #-1.00000000
+fmov d16, #8.50000000
+
+#------------------------------------------------------------------------------
+# Load-register (literal)
+#------------------------------------------------------------------------------
+
+ldr w3, #0
+ldr x29, #4
+ldrsw xzr, #-4
+ldr s0, #8
+ldr d0, #1048572
+ldr q0, #-1048576
+prfm pldl1strm, #0
+prfm #22, #0
+
+#------------------------------------------------------------------------------
+# Load/store exclusive
+#------------------------------------------------------------------------------
+
+stxrb w18, w8, [sp]
+stxrh w24, w15, [x16]
+stxr w5, w6, [x17]
+stxr w1, x10, [x21]
+ldxrb w30, [x0]
+ldxrh w17, [x4]
+ldxr w22, [sp]
+ldxr x11, [x29]
+ldxr x11, [x29]
+ldxr x11, [x29]
+stxp w12, w11, w10, [sp]
+stxp wzr, x27, x9, [x12]
+ldxp w0, wzr, [sp]
+ldxp x17, x0, [x18]
+ldxp x17, x0, [x18]
+stlxrb w12, w22, [x0]
+stlxrh w10, w1, [x1]
+stlxr w9, w2, [x2]
+stlxr w9, x3, [sp]
+ldaxrb w8, [x4]
+ldaxrh w7, [x5]
+ldaxr w6, [sp]
+ldaxr x5, [x6]
+ldaxr x5, [x6]
+ldaxr x5, [x6]
+stlxp w4, w5, w6, [sp]
+stlxp wzr, x6, x7, [x1]
+ldaxp w5, w18, [sp]
+ldaxp x6, x19, [x22]
+ldaxp x6, x19, [x22]
+stlrb w24, [sp]
+stlrh w25, [x30]
+stlr w26, [x29]
+stlr x27, [x28]
+stlr x27, [x28]
+stlr x27, [x28]
+ldarb w23, [sp]
+ldarh w22, [x30]
+ldar wzr, [x29]
+ldar x21, [x28]
+ldar x21, [x28]
+ldar x21, [x28]
+
+#------------------------------------------------------------------------------
+# Load/store (unscaled immediate)
+#------------------------------------------------------------------------------
+
+sturb w9, [sp]
+sturh wzr, [x12, #255]
+stur w16, [x0, #-256]
+stur x28, [x14, #1]
+ldurb w1, [x20, #255]
+ldurh w20, [x1, #255]
+ldur w12, [sp, #255]
+ldur xzr, [x12, #255]
+ldursb x9, [x7, #-256]
+ldursh x17, [x19, #-256]
+ldursw x20, [x15, #-256]
+prfum pldl2keep, [sp, #-256]
+ldursb w19, [x1, #-256]
+ldursh w15, [x21, #-256]
+stur b0, [sp, #1]
+stur h12, [x12, #-1]
+stur s15, [x0, #255]
+stur d31, [x5, #25]
+stur q9, [x5]
+ldur b3, [sp]
+ldur h5, [x4, #-256]
+ldur s7, [x12, #-1]
+ldur d11, [x19, #4]
+ldur q13, [x1, #2]
+
+#------------------------------------------------------------------------------
+# Load/store (immediate post-indexed)
+#------------------------------------------------------------------------------
+
+strb w9, [x2], #255
+strb w10, [x3], #1
+strb w10, [x3], #-256
+strh w9, [x2], #255
+strh w9, [x2], #1
+strh w10, [x3], #-256
+str w19, [sp], #255
+str w20, [x30], #1
+str w21, [x12], #-256
+str xzr, [x9], #255
+str x2, [x3], #1
+str x19, [x12], #-256
+ldrb w9, [x2], #255
+ldrb w10, [x3], #1
+ldrb w10, [x3], #-256
+ldrh w9, [x2], #255
+ldrh w9, [x2], #1
+ldrh w10, [x3], #-256
+ldr w19, [sp], #255
+ldr w20, [x30], #1
+ldr w21, [x12], #-256
+ldr xzr, [x9], #255
+ldr x2, [x3], #1
+ldr x19, [x12], #-256
+ldrsb xzr, [x9], #255
+ldrsb x2, [x3], #1
+ldrsb x19, [x12], #-256
+ldrsh xzr, [x9], #255
+ldrsh x2, [x3], #1
+ldrsh x19, [x12], #-256
+ldrsw xzr, [x9], #255
+ldrsw x2, [x3], #1
+ldrsw x19, [x12], #-256
+ldrsb wzr, [x9], #255
+ldrsb w2, [x3], #1
+ldrsb w19, [x12], #-256
+ldrsh wzr, [x9], #255
+ldrsh w2, [x3], #1
+ldrsh w19, [x12], #-256
+str b0, [x0], #255
+str b3, [x3], #1
+str b5, [sp], #-256
+str h10, [x10], #255
+str h13, [x23], #1
+str h15, [sp], #-256
+str s20, [x20], #255
+str s23, [x23], #1
+str s25, [x0], #-256
+str d20, [x20], #255
+str d23, [x23], #1
+str d25, [x0], #-256
+ldr b0, [x0], #255
+ldr b3, [x3], #1
+ldr b5, [sp], #-256
+ldr h10, [x10], #255
+ldr h13, [x23], #1
+ldr h15, [sp], #-256
+ldr s20, [x20], #255
+ldr s23, [x23], #1
+ldr s25, [x0], #-256
+ldr d20, [x20], #255
+ldr d23, [x23], #1
+ldr d25, [x0], #-256
+ldr q20, [x1], #255
+ldr q23, [x9], #1
+ldr q25, [x20], #-256
+str q10, [x1], #255
+str q22, [sp], #1
+str q21, [x20], #-256
+
+#-------------------------------------------------------------------------------
+# Load-store register (immediate pre-indexed)
+#-------------------------------------------------------------------------------
+
+ldr x3, [x4, #0]!
+strb w9, [x2, #255]!
+strb w10, [x3, #1]!
+strb w10, [x3, #-256]!
+strh w9, [x2, #255]!
+strh w9, [x2, #1]!
+strh w10, [x3, #-256]!
+str w19, [sp, #255]!
+str w20, [x30, #1]!
+str w21, [x12, #-256]!
+str xzr, [x9, #255]!
+str x2, [x3, #1]!
+str x19, [x12, #-256]!
+ldrb w9, [x2, #255]!
+ldrb w10, [x3, #1]!
+ldrb w10, [x3, #-256]!
+ldrh w9, [x2, #255]!
+ldrh w9, [x2, #1]!
+ldrh w10, [x3, #-256]!
+ldr w19, [sp, #255]!
+ldr w20, [x30, #1]!
+ldr w21, [x12, #-256]!
+ldr xzr, [x9, #255]!
+ldr x2, [x3, #1]!
+ldr x19, [x12, #-256]!
+ldrsb xzr, [x9, #255]!
+ldrsb x2, [x3, #1]!
+ldrsb x19, [x12, #-256]!
+ldrsh xzr, [x9, #255]!
+ldrsh x2, [x3, #1]!
+ldrsh x19, [x12, #-256]!
+ldrsw xzr, [x9, #255]!
+ldrsw x2, [x3, #1]!
+ldrsw x19, [x12, #-256]!
+ldrsb wzr, [x9, #255]!
+ldrsb w2, [x3, #1]!
+ldrsb w19, [x12, #-256]!
+ldrsh wzr, [x9, #255]!
+ldrsh w2, [x3, #1]!
+ldrsh w19, [x12, #-256]!
+str b0, [x0, #255]!
+str b3, [x3, #1]!
+str b5, [sp, #-256]!
+str h10, [x10, #255]!
+str h13, [x23, #1]!
+str h15, [sp, #-256]!
+str s20, [x20, #255]!
+str s23, [x23, #1]!
+str s25, [x0, #-256]!
+str d20, [x20, #255]!
+str d23, [x23, #1]!
+str d25, [x0, #-256]!
+ldr b0, [x0, #255]!
+ldr b3, [x3, #1]!
+ldr b5, [sp, #-256]!
+ldr h10, [x10, #255]!
+ldr h13, [x23, #1]!
+ldr h15, [sp, #-256]!
+ldr s20, [x20, #255]!
+ldr s23, [x23, #1]!
+ldr s25, [x0, #-256]!
+ldr d20, [x20, #255]!
+ldr d23, [x23, #1]!
+ldr d25, [x0, #-256]!
+ldr q20, [x1, #255]!
+ldr q23, [x9, #1]!
+ldr q25, [x20, #-256]!
+str q10, [x1, #255]!
+str q22, [sp, #1]!
+str q21, [x20, #-256]!
+
+#------------------------------------------------------------------------------
+# Load/store (unprivileged)
+#------------------------------------------------------------------------------
+
+sttrb w9, [sp]
+sttrh wzr, [x12, #255]
+sttr w16, [x0, #-256]
+sttr x28, [x14, #1]
+ldtrb w1, [x20, #255]
+ldtrh w20, [x1, #255]
+ldtr w12, [sp, #255]
+ldtr xzr, [x12, #255]
+ldtrsb x9, [x7, #-256]
+ldtrsh x17, [x19, #-256]
+ldtrsw x20, [x15, #-256]
+ldtrsb w19, [x1, #-256]
+ldtrsh w15, [x21, #-256]
+
+#------------------------------------------------------------------------------
+# Load/store (unsigned immediate)
+#------------------------------------------------------------------------------
+
+ldr x4, [x29]
+ldr x30, [x12, #32760]
+ldr x20, [sp, #8]
+ldr xzr, [sp]
+ldr w2, [sp]
+ldr w17, [sp, #16380]
+ldr w13, [x2, #4]
+ldrsw x2, [x5, #4]
+ldrsw x23, [sp, #16380]
+ldrh w2, [x4]
+ldrsh w23, [x6, #8190]
+ldrsh wzr, [sp, #2]
+ldrsh x29, [x2, #2]
+ldrb w26, [x3, #121]
+ldrb w12, [x2]
+ldrsb w27, [sp, #4095]
+ldrsb xzr, [x15]
+str x30, [sp]
+str w20, [x4, #16380]
+strh w17, [sp, #8190]
+strb w23, [x3, #4095]
+strb wzr, [x2]
+ldr b31, [sp, #4095]
+ldr h20, [x2, #8190]
+ldr s10, [x19, #16380]
+ldr d3, [x10, #32760]
+str q12, [sp, #65520]
+
+#------------------------------------------------------------------------------
+# Load/store (register offset)
+#------------------------------------------------------------------------------
+
+ldr h3, [sp, x5]
+ldr h9, [x27, x6]
+ldr h10, [x30, x7, lsl #1]
+str h11, [x29, x3, sxtx]
+str h12, [x28, xzr, sxtx]
+str h13, [x27, x5, sxtx #1]
+ldr h14, [x26, w6, uxtw]
+ldr h15, [x25, w7, uxtw]
+ldr h16, [x24, w8, uxtw #1]
+ldr h17, [x23, w9, sxtw]
+str h18, [x22, w10, sxtw]
+ldr h19, [x21, wzr, sxtw #1]
+ldrb w3, [sp, x5]
+ldrb w9, [x27, x6]
+ldrsb w10, [x30, x7]
+ldrb w11, [x29, x3, sxtx]
+strb w12, [x28, xzr, sxtx]
+ldrb w14, [x26, w6, uxtw]
+ldrsb w15, [x25, w7, uxtw]
+ldrb w17, [x23, w9, sxtw]
+ldrsb x18, [x22, w10, sxtw]
+ldrsh w3, [sp, x5]
+ldrsh w9, [x27, x6]
+ldrh w10, [x30, x7, lsl #1]
+strh w11, [x29, x3, sxtx]
+ldrh w12, [x28, xzr, sxtx]
+ldrsh x13, [x27, x5, sxtx #1]
+ldrh w14, [x26, w6, uxtw]
+ldrh w15, [x25, w7, uxtw]
+ldrsh w16, [x24, w8, uxtw #1]
+ldrh w17, [x23, w9, sxtw]
+ldrh w18, [x22, w10, sxtw]
+strh w19, [x21, wzr, sxtw #1]
+ldr w3, [sp, x5]
+ldr s9, [x27, x6]
+ldr w10, [x30, x7, lsl #2]
+ldr w11, [x29, x3, sxtx]
+str s12, [x28, xzr, sxtx]
+str w13, [x27, x5, sxtx #2]
+str w14, [x26, w6, uxtw]
+ldr w15, [x25, w7, uxtw]
+ldr w16, [x24, w8, uxtw #2]
+ldrsw x17, [x23, w9, sxtw]
+ldr w18, [x22, w10, sxtw]
+ldrsw x19, [x21, wzr, sxtw #2]
+ldr x3, [sp, x5]
+str x9, [x27, x6]
+ldr d10, [x30, x7, lsl #3]
+str x11, [x29, x3, sxtx]
+ldr x12, [x28, xzr, sxtx]
+ldr x13, [x27, x5, sxtx #3]
+prfm pldl1keep, [x26, w6, uxtw]
+ldr x15, [x25, w7, uxtw]
+ldr x16, [x24, w8, uxtw #3]
+ldr x17, [x23, w9, sxtw]
+ldr x18, [x22, w10, sxtw]
+str d19, [x21, wzr, sxtw #3]
+ldr q3, [sp, x5]
+ldr q9, [x27, x6]
+ldr q10, [x30, x7, lsl #4]
+str q11, [x29, x3, sxtx]
+str q12, [x28, xzr, sxtx]
+str q13, [x27, x5, sxtx #4]
+ldr q14, [x26, w6, uxtw]
+ldr q15, [x25, w7, uxtw]
+ldr q16, [x24, w8, uxtw #4]
+ldr q17, [x23, w9, sxtw]
+str q18, [x22, w10, sxtw]
+ldr q19, [x21, wzr, sxtw #4]
+
+#------------------------------------------------------------------------------
+# Load/store register pair (offset)
+#------------------------------------------------------------------------------
+
+ldp w3, w5, [sp]
+stp wzr, w9, [sp, #252]
+ldp w2, wzr, [sp, #-256]
+ldp w9, w10, [sp, #4]
+ldpsw x9, x10, [sp, #4]
+ldpsw x9, x10, [x2, #-256]
+ldpsw x20, x30, [sp, #252]
+ldp x21, x29, [x2, #504]
+ldp x22, x23, [x3, #-512]
+ldp x24, x25, [x4, #8]
+ldp s29, s28, [sp, #252]
+stp s27, s26, [sp, #-256]
+ldp s1, s2, [x3, #44]
+stp d3, d5, [x9, #504]
+stp d7, d11, [x10, #-512]
+ldp d2, d3, [x30, #-8]
+stp q3, q5, [sp]
+stp q17, q19, [sp, #1008]
+ldp q23, q29, [x1, #-1024]
+
+#------------------------------------------------------------------------------
+# Load/store register pair (post-indexed)
+#------------------------------------------------------------------------------
+
+ldp w3, w5, [sp], #0
+stp wzr, w9, [sp], #252
+ldp w2, wzr, [sp], #-256
+ldp w9, w10, [sp], #4
+ldpsw x9, x10, [sp], #4
+ldpsw x9, x10, [x2], #-256
+ldpsw x20, x30, [sp], #252
+ldp x21, x29, [x2], #504
+ldp x22, x23, [x3], #-512
+ldp x24, x25, [x4], #8
+ldp s29, s28, [sp], #252
+stp s27, s26, [sp], #-256
+ldp s1, s2, [x3], #44
+stp d3, d5, [x9], #504
+stp d7, d11, [x10], #-512
+ldp d2, d3, [x30], #-8
+stp q3, q5, [sp], #0
+stp q17, q19, [sp], #1008
+ldp q23, q29, [x1], #-1024
+
+#------------------------------------------------------------------------------
+# Load/store register pair (pre-indexed)
+#------------------------------------------------------------------------------
+
+ldp w3, w5, [sp, #0]!
+stp wzr, w9, [sp, #252]!
+ldp w2, wzr, [sp, #-256]!
+ldp w9, w10, [sp, #4]!
+ldpsw x9, x10, [sp, #4]!
+ldpsw x9, x10, [x2, #-256]!
+ldpsw x20, x30, [sp, #252]!
+ldp x21, x29, [x2, #504]!
+ldp x22, x23, [x3, #-512]!
+ldp x24, x25, [x4, #8]!
+ldp s29, s28, [sp, #252]!
+stp s27, s26, [sp, #-256]!
+ldp s1, s2, [x3, #44]!
+stp d3, d5, [x9, #504]!
+stp d7, d11, [x10, #-512]!
+ldp d2, d3, [x30, #-8]!
+stp q3, q5, [sp, #0]!
+stp q17, q19, [sp, #1008]!
+ldp q23, q29, [x1, #-1024]!
+
+#------------------------------------------------------------------------------
+# Load/store register pair (offset)
+#------------------------------------------------------------------------------
+
+ldnp w3, w5, [sp]
+stnp wzr, w9, [sp, #252]
+ldnp w2, wzr, [sp, #-256]
+ldnp w9, w10, [sp, #4]
+ldnp x21, x29, [x2, #504]
+ldnp x22, x23, [x3, #-512]
+ldnp x24, x25, [x4, #8]
+ldnp s29, s28, [sp, #252]
+stnp s27, s26, [sp, #-256]
+ldnp s1, s2, [x3, #44]
+stnp d3, d5, [x9, #504]
+stnp d7, d11, [x10, #-512]
+ldnp d2, d3, [x30, #-8]
+stnp q3, q5, [sp]
+stnp q17, q19, [sp, #1008]
+ldnp q23, q29, [x1, #-1024]
+
+#------------------------------------------------------------------------------
+# Logical (immediate)
+#------------------------------------------------------------------------------
+
+mov w3, #983055
+mov x10, #-6148914691236517206
+ands w4, w4, #983055
+ands x11, x11, #-6148914691236517206
+
+#------------------------------------------------------------------------------
+# Logical (shifted register)
+#------------------------------------------------------------------------------
+
+and w12, w23, w21
+and w16, w15, w1, lsl #1
+and w9, w4, w10, lsl #31
+and w3, w30, w11
+and x3, x5, x7, lsl #63
+and x5, x14, x19, asr #4
+and w3, w17, w19, ror #31
+and w0, w2, wzr, lsr #17
+and w3, w30, w11, asr #2
+and xzr, x4, x26
+and w3, wzr, w20, ror #2
+and x7, x20, xzr, asr #63
+bic x13, x20, x14, lsl #47
+bic w2, w7, w9
+orr w2, w7, w0, asr #31
+orr x8, x9, x10, lsl #12
+orn x3, x5, x7, asr #2
+orn w2, w5, w29
+ands w7, wzr, w9, lsl #1
+ands x3, x5, x20, ror #63
+bics w3, w5, w7
+bics x3, xzr, x3, lsl #1
+tst w3, w7, lsl #31
+tst x2, x20, asr #2
+mov x3, x6
+mov x3, xzr
+mov wzr, w2
+mov w3, w5
+
+#------------------------------------------------------------------------------
+# Move wide (immediate)
+#------------------------------------------------------------------------------
+
+movz w2, #0, lsl #16
+mov w2, #-1235
+mov x2, #5299989643264
+mov x2, #0
+movk w3, #0
+movz x4, #0, lsl #16
+movk w5, #0, lsl #16
+movz x6, #0, lsl #32
+movk x7, #0, lsl #32
+movz x8, #0, lsl #48
+movk x9, #0, lsl #48
+
+#------------------------------------------------------------------------------
+# PC-relative addressing
+#------------------------------------------------------------------------------
+
+adr x2, #1600
+adrp x21, #6553600
+adr x0, #262144
+
+#------------------------------------------------------------------------------
+# Test and branch (immediate)
+#------------------------------------------------------------------------------
+
+tbz x12, #62, #0
+tbz x12, #62, #4
+tbz x12, #62, #-32768
+tbnz x12, #60, #32764
+
+#------------------------------------------------------------------------------
+# Unconditional branch (immediate)
+#------------------------------------------------------------------------------
+
+b #4
+b #-4
+b #134217724
+
+#------------------------------------------------------------------------------
+# Unconditional branch (register)
+#------------------------------------------------------------------------------
+
+br x20
+blr xzr
+ret x10
+ret
+eret
+drps
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.13 add w2, w3, #4095
+# CHECK-NEXT: 1 1 0.13 add w30, w29, #1, lsl #12
+# CHECK-NEXT: 1 1 0.13 add w13, w5, #4095, lsl #12
+# CHECK-NEXT: 1 1 0.13 add x5, x7, #1638
+# CHECK-NEXT: 1 1 0.13 add w20, wsp, #801
+# CHECK-NEXT: 1 1 0.13 add wsp, wsp, #1104
+# CHECK-NEXT: 1 1 0.13 add wsp, w30, #4084
+# CHECK-NEXT: 1 1 0.13 add x0, x24, #291
+# CHECK-NEXT: 1 1 0.13 add x3, x24, #4095, lsl #12
+# CHECK-NEXT: 1 1 0.13 add x8, sp, #1074
+# CHECK-NEXT: 1 1 0.13 add sp, x29, #3816
+# CHECK-NEXT: 1 1 0.13 sub w0, wsp, #4077
+# CHECK-NEXT: 1 1 0.13 sub w4, w20, #546, lsl #12
+# CHECK-NEXT: 1 1 0.13 sub sp, sp, #288
+# CHECK-NEXT: 1 1 0.13 sub wsp, w19, #16
+# CHECK-NEXT: 1 1 0.25 adds w13, w23, #291, lsl #12
+# CHECK-NEXT: 1 1 0.25 cmn w2, #4095
+# CHECK-NEXT: 1 1 0.25 adds w20, wsp, #0
+# CHECK-NEXT: 1 1 0.25 cmn x3, #1, lsl #12
+# CHECK-NEXT: 1 1 0.25 cmp sp, #20, lsl #12
+# CHECK-NEXT: 1 1 0.25 cmp x30, #4095
+# CHECK-NEXT: 1 1 0.25 subs x4, sp, #3822
+# CHECK-NEXT: 1 1 0.25 cmn w3, #291, lsl #12
+# CHECK-NEXT: 1 1 0.25 cmn wsp, #1365
+# CHECK-NEXT: 1 1 0.25 cmn sp, #1092, lsl #12
+# CHECK-NEXT: 1 1 0.13 mov sp, x30
+# CHECK-NEXT: 1 1 0.13 mov wsp, w20
+# CHECK-NEXT: 1 1 0.13 mov x11, sp
+# CHECK-NEXT: 1 1 0.13 mov w24, wsp
+# CHECK-NEXT: 1 1 0.13 add w3, w5, w7
+# CHECK-NEXT: 1 1 0.13 add wzr, w3, w5
+# CHECK-NEXT: 1 1 0.13 add w20, wzr, w4
+# CHECK-NEXT: 1 1 0.13 add w4, w6, wzr
+# CHECK-NEXT: 1 1 0.13 add w11, w13, w15
+# CHECK-NEXT: 1 2 0.50 add w9, w3, wzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 add w17, w29, w20, lsl #31
+# CHECK-NEXT: 1 2 0.50 add w21, w22, w23, lsr #0
+# CHECK-NEXT: 1 2 0.50 add w24, w25, w26, lsr #18
+# CHECK-NEXT: 1 2 0.50 add w27, w28, w29, lsr #31
+# CHECK-NEXT: 1 2 0.50 add w2, w3, w4, asr #0
+# CHECK-NEXT: 1 2 0.50 add w5, w6, w7, asr #21
+# CHECK-NEXT: 1 2 0.50 add w8, w9, w10, asr #31
+# CHECK-NEXT: 1 1 0.13 add x3, x5, x7
+# CHECK-NEXT: 1 1 0.13 add xzr, x3, x5
+# CHECK-NEXT: 1 1 0.13 add x20, xzr, x4
+# CHECK-NEXT: 1 1 0.13 add x4, x6, xzr
+# CHECK-NEXT: 1 1 0.13 add x11, x13, x15
+# CHECK-NEXT: 1 2 0.50 add x9, x3, xzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 add x17, x29, x20, lsl #63
+# CHECK-NEXT: 1 2 0.50 add x21, x22, x23, lsr #0
+# CHECK-NEXT: 1 2 0.50 add x24, x25, x26, lsr #18
+# CHECK-NEXT: 1 2 0.50 add x27, x28, x29, lsr #63
+# CHECK-NEXT: 1 2 0.50 add x2, x3, x4, asr #0
+# CHECK-NEXT: 1 2 0.50 add x5, x6, x7, asr #21
+# CHECK-NEXT: 1 2 0.50 add x8, x9, x10, asr #63
+# CHECK-NEXT: 1 1 0.25 adds w3, w5, w7
+# CHECK-NEXT: 1 1 0.25 cmn w3, w5
+# CHECK-NEXT: 1 1 0.25 adds w20, wzr, w4
+# CHECK-NEXT: 1 1 0.25 adds w4, w6, wzr
+# CHECK-NEXT: 1 1 0.25 adds w11, w13, w15
+# CHECK-NEXT: 1 2 0.50 adds w9, w3, wzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 adds w17, w29, w20, lsl #31
+# CHECK-NEXT: 1 2 0.50 adds w21, w22, w23, lsr #0
+# CHECK-NEXT: 1 2 0.50 adds w24, w25, w26, lsr #18
+# CHECK-NEXT: 1 2 0.50 adds w27, w28, w29, lsr #31
+# CHECK-NEXT: 1 2 0.50 adds w2, w3, w4, asr #0
+# CHECK-NEXT: 1 2 0.50 adds w5, w6, w7, asr #21
+# CHECK-NEXT: 1 2 0.50 adds w8, w9, w10, asr #31
+# CHECK-NEXT: 1 1 0.25 adds x3, x5, x7
+# CHECK-NEXT: 1 1 0.25 cmn x3, x5
+# CHECK-NEXT: 1 1 0.25 adds x20, xzr, x4
+# CHECK-NEXT: 1 1 0.25 adds x4, x6, xzr
+# CHECK-NEXT: 1 1 0.25 adds x11, x13, x15
+# CHECK-NEXT: 1 2 0.50 adds x9, x3, xzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 adds x17, x29, x20, lsl #63
+# CHECK-NEXT: 1 2 0.50 adds x21, x22, x23, lsr #0
+# CHECK-NEXT: 1 2 0.50 adds x24, x25, x26, lsr #18
+# CHECK-NEXT: 1 2 0.50 adds x27, x28, x29, lsr #63
+# CHECK-NEXT: 1 2 0.50 adds x2, x3, x4, asr #0
+# CHECK-NEXT: 1 2 0.50 adds x5, x6, x7, asr #21
+# CHECK-NEXT: 1 2 0.50 adds x8, x9, x10, asr #63
+# CHECK-NEXT: 1 1 0.13 sub w3, w5, w7
+# CHECK-NEXT: 1 1 0.13 sub wzr, w3, w5
+# CHECK-NEXT: 1 1 0.13 sub w4, w6, wzr
+# CHECK-NEXT: 1 1 0.13 sub w11, w13, w15
+# CHECK-NEXT: 1 2 0.50 sub w9, w3, wzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 sub w17, w29, w20, lsl #31
+# CHECK-NEXT: 1 2 0.50 sub w21, w22, w23, lsr #0
+# CHECK-NEXT: 1 2 0.50 sub w24, w25, w26, lsr #18
+# CHECK-NEXT: 1 2 0.50 sub w27, w28, w29, lsr #31
+# CHECK-NEXT: 1 2 0.50 sub w2, w3, w4, asr #0
+# CHECK-NEXT: 1 2 0.50 sub w5, w6, w7, asr #21
+# CHECK-NEXT: 1 2 0.50 sub w8, w9, w10, asr #31
+# CHECK-NEXT: 1 1 0.13 sub x3, x5, x7
+# CHECK-NEXT: 1 1 0.13 sub xzr, x3, x5
+# CHECK-NEXT: 1 1 0.13 sub x4, x6, xzr
+# CHECK-NEXT: 1 1 0.13 sub x11, x13, x15
+# CHECK-NEXT: 1 2 0.50 sub x9, x3, xzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 sub x17, x29, x20, lsl #63
+# CHECK-NEXT: 1 2 0.50 sub x21, x22, x23, lsr #0
+# CHECK-NEXT: 1 2 0.50 sub x24, x25, x26, lsr #18
+# CHECK-NEXT: 1 2 0.50 sub x27, x28, x29, lsr #63
+# CHECK-NEXT: 1 2 0.50 sub x2, x3, x4, asr #0
+# CHECK-NEXT: 1 2 0.50 sub x5, x6, x7, asr #21
+# CHECK-NEXT: 1 2 0.50 sub x8, x9, x10, asr #63
+# CHECK-NEXT: 1 1 0.25 subs w3, w5, w7
+# CHECK-NEXT: 1 1 0.25 cmp w3, w5
+# CHECK-NEXT: 1 1 0.25 subs w4, w6, wzr
+# CHECK-NEXT: 1 1 0.25 subs w11, w13, w15
+# CHECK-NEXT: 1 2 0.50 subs w9, w3, wzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 subs w17, w29, w20, lsl #31
+# CHECK-NEXT: 1 2 0.50 subs w21, w22, w23, lsr #0
+# CHECK-NEXT: 1 2 0.50 subs w24, w25, w26, lsr #18
+# CHECK-NEXT: 1 2 0.50 subs w27, w28, w29, lsr #31
+# CHECK-NEXT: 1 2 0.50 subs w2, w3, w4, asr #0
+# CHECK-NEXT: 1 2 0.50 subs w5, w6, w7, asr #21
+# CHECK-NEXT: 1 2 0.50 subs w8, w9, w10, asr #31
+# CHECK-NEXT: 1 1 0.25 subs x3, x5, x7
+# CHECK-NEXT: 1 1 0.25 cmp x3, x5
+# CHECK-NEXT: 1 1 0.25 subs x4, x6, xzr
+# CHECK-NEXT: 1 1 0.25 subs x11, x13, x15
+# CHECK-NEXT: 1 2 0.50 subs x9, x3, xzr, lsl #10
+# CHECK-NEXT: 1 2 0.50 subs x17, x29, x20, lsl #63
+# CHECK-NEXT: 1 2 0.50 subs x21, x22, x23, lsr #0
+# CHECK-NEXT: 1 2 0.50 subs x24, x25, x26, lsr #18
+# CHECK-NEXT: 1 2 0.50 subs x27, x28, x29, lsr #63
+# CHECK-NEXT: 1 2 0.50 subs x2, x3, x4, asr #0
+# CHECK-NEXT: 1 2 0.50 subs x5, x6, x7, asr #21
+# CHECK-NEXT: 1 2 0.50 subs x8, x9, x10, asr #63
+# CHECK-NEXT: 1 1 0.25 cmn wzr, w4
+# CHECK-NEXT: 1 1 0.25 cmn w5, wzr
+# CHECK-NEXT: 1 1 0.25 cmn w6, w7
+# CHECK-NEXT: 1 2 0.50 cmn w8, w9, lsl #15
+# CHECK-NEXT: 1 2 0.50 cmn w10, w11, lsl #31
+# CHECK-NEXT: 1 2 0.50 cmn w12, w13, lsr #0
+# CHECK-NEXT: 1 2 0.50 cmn w14, w15, lsr #21
+# CHECK-NEXT: 1 2 0.50 cmn w16, w17, lsr #31
+# CHECK-NEXT: 1 2 0.50 cmn w18, w19, asr #0
+# CHECK-NEXT: 1 2 0.50 cmn w20, w21, asr #22
+# CHECK-NEXT: 1 2 0.50 cmn w22, w23, asr #31
+# CHECK-NEXT: 1 1 0.25 cmn x0, x3
+# CHECK-NEXT: 1 1 0.25 cmn xzr, x4
+# CHECK-NEXT: 1 1 0.25 cmn x5, xzr
+# CHECK-NEXT: 1 1 0.25 cmn x6, x7
+# CHECK-NEXT: 1 2 0.50 cmn x8, x9, lsl #15
+# CHECK-NEXT: 1 2 0.50 cmn x10, x11, lsl #63
+# CHECK-NEXT: 1 2 0.50 cmn x12, x13, lsr #0
+# CHECK-NEXT: 1 2 0.50 cmn x14, x15, lsr #41
+# CHECK-NEXT: 1 2 0.50 cmn x16, x17, lsr #63
+# CHECK-NEXT: 1 2 0.50 cmn x18, x19, asr #0
+# CHECK-NEXT: 1 2 0.50 cmn x20, x21, asr #55
+# CHECK-NEXT: 1 2 0.50 cmn x22, x23, asr #63
+# CHECK-NEXT: 1 1 0.25 cmp w0, w3
+# CHECK-NEXT: 1 1 0.25 cmp wzr, w4
+# CHECK-NEXT: 1 1 0.25 cmp w5, wzr
+# CHECK-NEXT: 1 1 0.25 cmp w6, w7
+# CHECK-NEXT: 1 2 0.50 cmp w8, w9, lsl #15
+# CHECK-NEXT: 1 2 0.50 cmp w10, w11, lsl #31
+# CHECK-NEXT: 1 2 0.50 cmp w12, w13, lsr #0
+# CHECK-NEXT: 1 2 0.50 cmp w14, w15, lsr #21
+# CHECK-NEXT: 1 2 0.50 cmp w18, w19, asr #0
+# CHECK-NEXT: 1 2 0.50 cmp w20, w21, asr #22
+# CHECK-NEXT: 1 2 0.50 cmp w22, w23, asr #31
+# CHECK-NEXT: 1 1 0.25 cmp x0, x3
+# CHECK-NEXT: 1 1 0.25 cmp xzr, x4
+# CHECK-NEXT: 1 1 0.25 cmp x5, xzr
+# CHECK-NEXT: 1 1 0.25 cmp x6, x7
+# CHECK-NEXT: 1 2 0.50 cmp x8, x9, lsl #15
+# CHECK-NEXT: 1 2 0.50 cmp x10, x11, lsl #63
+# CHECK-NEXT: 1 2 0.50 cmp x12, x13, lsr #0
+# CHECK-NEXT: 1 2 0.50 cmp x14, x15, lsr #41
+# CHECK-NEXT: 1 2 0.50 cmp x16, x17, lsr #63
+# CHECK-NEXT: 1 2 0.50 cmp x18, x19, asr #0
+# CHECK-NEXT: 1 2 0.50 cmp x20, x21, asr #55
+# CHECK-NEXT: 1 2 0.50 cmp x22, x23, asr #63
+# CHECK-NEXT: 1 1 0.25 cmp wzr, w0
+# CHECK-NEXT: 1 1 0.25 cmp xzr, x0
+# CHECK-NEXT: 1 1 0.13 adc w29, w27, w25
+# CHECK-NEXT: 1 1 0.13 adc wzr, w3, w4
+# CHECK-NEXT: 1 1 0.13 adc w9, wzr, w10
+# CHECK-NEXT: 1 1 0.13 adc w20, w0, wzr
+# CHECK-NEXT: 1 1 0.13 adc x29, x27, x25
+# CHECK-NEXT: 1 1 0.13 adc xzr, x3, x4
+# CHECK-NEXT: 1 1 0.13 adc x9, xzr, x10
+# CHECK-NEXT: 1 1 0.13 adc x20, x0, xzr
+# CHECK-NEXT: 1 1 0.25 adcs w29, w27, w25
+# CHECK-NEXT: 1 1 0.25 adcs wzr, w3, w4
+# CHECK-NEXT: 1 1 0.25 adcs w9, wzr, w10
+# CHECK-NEXT: 1 1 0.25 adcs w20, w0, wzr
+# CHECK-NEXT: 1 1 0.25 adcs x29, x27, x25
+# CHECK-NEXT: 1 1 0.25 adcs xzr, x3, x4
+# CHECK-NEXT: 1 1 0.25 adcs x9, xzr, x10
+# CHECK-NEXT: 1 1 0.25 adcs x20, x0, xzr
+# CHECK-NEXT: 1 1 0.13 sbc w29, w27, w25
+# CHECK-NEXT: 1 1 0.13 sbc wzr, w3, w4
+# CHECK-NEXT: 1 1 0.13 ngc w9, w10
+# CHECK-NEXT: 1 1 0.13 sbc w20, w0, wzr
+# CHECK-NEXT: 1 1 0.13 sbc x29, x27, x25
+# CHECK-NEXT: 1 1 0.13 sbc xzr, x3, x4
+# CHECK-NEXT: 1 1 0.13 ngc x9, x10
+# CHECK-NEXT: 1 1 0.13 sbc x20, x0, xzr
+# CHECK-NEXT: 1 1 0.25 sbcs w29, w27, w25
+# CHECK-NEXT: 1 1 0.25 sbcs wzr, w3, w4
+# CHECK-NEXT: 1 1 0.25 ngcs w9, w10
+# CHECK-NEXT: 1 1 0.25 sbcs w20, w0, wzr
+# CHECK-NEXT: 1 1 0.25 sbcs x29, x27, x25
+# CHECK-NEXT: 1 1 0.25 sbcs xzr, x3, x4
+# CHECK-NEXT: 1 1 0.25 ngcs x9, x10
+# CHECK-NEXT: 1 1 0.25 sbcs x20, x0, xzr
+# CHECK-NEXT: 1 1 0.13 ngc w3, w12
+# CHECK-NEXT: 1 1 0.13 ngc wzr, w9
+# CHECK-NEXT: 1 1 0.13 ngc w23, wzr
+# CHECK-NEXT: 1 1 0.13 ngc x29, x30
+# CHECK-NEXT: 1 1 0.13 ngc xzr, x0
+# CHECK-NEXT: 1 1 0.13 ngc x0, xzr
+# CHECK-NEXT: 1 1 0.25 ngcs w3, w12
+# CHECK-NEXT: 1 1 0.25 ngcs wzr, w9
+# CHECK-NEXT: 1 1 0.25 ngcs w23, wzr
+# CHECK-NEXT: 1 1 0.25 ngcs x29, x30
+# CHECK-NEXT: 1 1 0.25 ngcs xzr, x0
+# CHECK-NEXT: 1 1 0.25 ngcs x0, xzr
+# CHECK-NEXT: 1 1 0.13 sbfx x1, x2, #3, #2
+# CHECK-NEXT: 1 1 0.13 asr x3, x4, #63
+# CHECK-NEXT: 1 1 0.13 asr wzr, wzr, #31
+# CHECK-NEXT: 1 1 0.13 sbfx w12, w9, #0, #1
+# CHECK-NEXT: 1 1 0.13 ubfiz x4, x5, #52, #11
+# CHECK-NEXT: 1 1 0.13 ubfx xzr, x4, #0, #1
+# CHECK-NEXT: 1 1 0.13 ubfiz x4, xzr, #1, #6
+# CHECK-NEXT: 1 1 0.13 lsr x5, x6, #12
+# CHECK-NEXT: 1 2 0.50 bfi x4, x5, #52, #11
+# CHECK-NEXT: 1 2 0.50 bfxil xzr, x4, #0, #1
+# CHECK-NEXT: 1 2 0.50 bfc x4, #1, #6
+# CHECK-NEXT: 1 2 0.50 bfxil x5, x6, #12, #52
+# CHECK-NEXT: 1 1 0.13 sxtb w1, w2
+# CHECK-NEXT: 1 1 0.13 sxtb xzr, w3
+# CHECK-NEXT: 1 1 0.13 sxth w9, w10
+# CHECK-NEXT: 1 1 0.13 sxth x0, w1
+# CHECK-NEXT: 1 1 0.13 sxtw x3, w30
+# CHECK-NEXT: 1 1 0.13 uxtb w1, w2
+# CHECK-NEXT: 1 1 0.13 uxth w9, w10
+# CHECK-NEXT: 1 1 0.13 ubfx x3, x30, #0, #32
+# CHECK-NEXT: 1 1 0.13 asr w3, w2, #0
+# CHECK-NEXT: 1 1 0.13 asr w9, w10, #31
+# CHECK-NEXT: 1 1 0.13 asr x20, x21, #63
+# CHECK-NEXT: 1 1 0.13 asr w1, wzr, #3
+# CHECK-NEXT: 1 1 0.13 lsr w3, w2, #0
+# CHECK-NEXT: 1 1 0.13 lsr w9, w10, #31
+# CHECK-NEXT: 1 1 0.13 lsr x20, x21, #63
+# CHECK-NEXT: 1 1 0.13 lsr wzr, wzr, #3
+# CHECK-NEXT: 1 1 0.13 lsr w3, w2, #0
+# CHECK-NEXT: 1 1 0.13 lsl w9, w10, #31
+# CHECK-NEXT: 1 1 0.13 lsl x20, x21, #63
+# CHECK-NEXT: 1 1 0.13 lsl w1, wzr, #3
+# CHECK-NEXT: 1 1 0.13 sbfx w9, w10, #0, #1
+# CHECK-NEXT: 1 1 0.13 sbfiz x2, x3, #63, #1
+# CHECK-NEXT: 1 1 0.13 asr x19, x20, #0
+# CHECK-NEXT: 1 1 0.13 sbfiz x9, x10, #5, #59
+# CHECK-NEXT: 1 1 0.13 asr w9, w10, #0
+# CHECK-NEXT: 1 1 0.13 sbfiz w11, w12, #31, #1
+# CHECK-NEXT: 1 1 0.13 sbfiz w13, w14, #29, #3
+# CHECK-NEXT: 1 1 0.13 sbfiz xzr, xzr, #10, #11
+# CHECK-NEXT: 1 1 0.13 sbfx w9, w10, #0, #1
+# CHECK-NEXT: 1 1 0.13 asr x2, x3, #63
+# CHECK-NEXT: 1 1 0.13 asr x19, x20, #0
+# CHECK-NEXT: 1 1 0.13 asr x9, x10, #5
+# CHECK-NEXT: 1 1 0.13 asr w9, w10, #0
+# CHECK-NEXT: 1 1 0.13 asr w11, w12, #31
+# CHECK-NEXT: 1 1 0.13 asr w13, w14, #29
+# CHECK-NEXT: 1 1 0.13 sbfx xzr, xzr, #10, #11
+# CHECK-NEXT: 1 2 0.50 bfxil w9, w10, #0, #1
+# CHECK-NEXT: 1 2 0.50 bfi x2, x3, #63, #1
+# CHECK-NEXT: 1 2 0.50 bfxil x19, x20, #0, #64
+# CHECK-NEXT: 1 2 0.50 bfi x9, x10, #5, #59
+# CHECK-NEXT: 1 2 0.50 bfxil w9, w10, #0, #32
+# CHECK-NEXT: 1 2 0.50 bfi w11, w12, #31, #1
+# CHECK-NEXT: 1 2 0.50 bfi w13, w14, #29, #3
+# CHECK-NEXT: 1 2 0.50 bfc xzr, #10, #11
+# CHECK-NEXT: 1 2 0.50 bfxil w9, w10, #0, #1
+# CHECK-NEXT: 1 2 0.50 bfxil x2, x3, #63, #1
+# CHECK-NEXT: 1 2 0.50 bfxil x19, x20, #0, #64
+# CHECK-NEXT: 1 2 0.50 bfxil x9, x10, #5, #59
+# CHECK-NEXT: 1 2 0.50 bfxil w9, w10, #0, #32
+# CHECK-NEXT: 1 2 0.50 bfxil w11, w12, #31, #1
+# CHECK-NEXT: 1 2 0.50 bfxil w13, w14, #29, #3
+# CHECK-NEXT: 1 2 0.50 bfxil xzr, xzr, #10, #11
+# CHECK-NEXT: 1 1 0.13 ubfx w9, w10, #0, #1
+# CHECK-NEXT: 1 1 0.13 lsl x2, x3, #63
+# CHECK-NEXT: 1 1 0.13 lsr x19, x20, #0
+# CHECK-NEXT: 1 1 0.13 lsl x9, x10, #5
+# CHECK-NEXT: 1 1 0.13 lsr w9, w10, #0
+# CHECK-NEXT: 1 1 0.13 lsl w11, w12, #31
+# CHECK-NEXT: 1 1 0.13 lsl w13, w14, #29
+# CHECK-NEXT: 1 1 0.13 ubfiz xzr, xzr, #10, #11
+# CHECK-NEXT: 1 1 0.13 ubfx w9, w10, #0, #1
+# CHECK-NEXT: 1 1 0.13 lsr x2, x3, #63
+# CHECK-NEXT: 1 1 0.13 lsr x19, x20, #0
+# CHECK-NEXT: 1 1 0.13 lsr x9, x10, #5
+# CHECK-NEXT: 1 1 0.13 lsr w9, w10, #0
+# CHECK-NEXT: 1 1 0.13 lsr w11, w12, #31
+# CHECK-NEXT: 1 1 0.13 lsr w13, w14, #29
+# CHECK-NEXT: 1 1 0.13 ubfx xzr, xzr, #10, #11
+# CHECK-NEXT: 1 1 0.33 cbz w5, #4
+# CHECK-NEXT: 1 1 0.33 cbz x5, #0
+# CHECK-NEXT: 1 1 0.33 cbnz x2, #-4
+# CHECK-NEXT: 1 1 0.33 cbnz x26, #1048572
+# CHECK-NEXT: 1 1 0.33 cbz wzr, #0
+# CHECK-NEXT: 1 1 0.33 cbnz xzr, #0
+# CHECK-NEXT: 1 1 0.33 b.ne #4
+# CHECK-NEXT: 1 1 0.33 b.ge #1048572
+# CHECK-NEXT: 1 1 0.33 b.ge #-4
+# CHECK-NEXT: 1 1 0.25 ccmp w1, #31, #0, eq
+# CHECK-NEXT: 1 1 0.25 ccmp w3, #0, #15, hs
+# CHECK-NEXT: 1 1 0.25 ccmp wzr, #15, #13, hs
+# CHECK-NEXT: 1 1 0.25 ccmp x9, #31, #0, le
+# CHECK-NEXT: 1 1 0.25 ccmp x3, #0, #15, gt
+# CHECK-NEXT: 1 1 0.25 ccmp xzr, #5, #7, ne
+# CHECK-NEXT: 1 1 0.25 ccmn w1, #31, #0, eq
+# CHECK-NEXT: 1 1 0.25 ccmn w3, #0, #15, hs
+# CHECK-NEXT: 1 1 0.25 ccmn wzr, #15, #13, hs
+# CHECK-NEXT: 1 1 0.25 ccmn x9, #31, #0, le
+# CHECK-NEXT: 1 1 0.25 ccmn x3, #0, #15, gt
+# CHECK-NEXT: 1 1 0.25 ccmn xzr, #5, #7, ne
+# CHECK-NEXT: 1 1 0.25 ccmp w1, wzr, #0, eq
+# CHECK-NEXT: 1 1 0.25 ccmp w3, w0, #15, hs
+# CHECK-NEXT: 1 1 0.25 ccmp wzr, w15, #13, hs
+# CHECK-NEXT: 1 1 0.25 ccmp x9, xzr, #0, le
+# CHECK-NEXT: 1 1 0.25 ccmp x3, x0, #15, gt
+# CHECK-NEXT: 1 1 0.25 ccmp xzr, x5, #7, ne
+# CHECK-NEXT: 1 1 0.25 ccmn w1, wzr, #0, eq
+# CHECK-NEXT: 1 1 0.25 ccmn w3, w0, #15, hs
+# CHECK-NEXT: 1 1 0.25 ccmn wzr, w15, #13, hs
+# CHECK-NEXT: 1 1 0.25 ccmn x9, xzr, #0, le
+# CHECK-NEXT: 1 1 0.25 ccmn x3, x0, #15, gt
+# CHECK-NEXT: 1 1 0.25 ccmn xzr, x5, #7, ne
+# CHECK-NEXT: 1 1 0.13 csel w1, w0, w19, ne
+# CHECK-NEXT: 1 1 0.13 csel wzr, w5, w9, eq
+# CHECK-NEXT: 1 1 0.13 csel w9, wzr, w30, gt
+# CHECK-NEXT: 1 1 0.13 csel w1, w28, wzr, mi
+# CHECK-NEXT: 1 1 0.13 csel x19, x23, x29, lt
+# CHECK-NEXT: 1 1 0.13 csel xzr, x3, x4, ge
+# CHECK-NEXT: 1 1 0.13 csel x5, xzr, x6, hs
+# CHECK-NEXT: 1 1 0.13 csel x7, x8, xzr, lo
+# CHECK-NEXT: 1 1 0.13 csinc w1, w0, w19, ne
+# CHECK-NEXT: 1 1 0.13 csinc wzr, w5, w9, eq
+# CHECK-NEXT: 1 1 0.13 csinc w9, wzr, w30, gt
+# CHECK-NEXT: 1 1 0.13 csinc w1, w28, wzr, mi
+# CHECK-NEXT: 1 1 0.13 csinc x19, x23, x29, lt
+# CHECK-NEXT: 1 1 0.13 csinc xzr, x3, x4, ge
+# CHECK-NEXT: 1 1 0.13 csinc x5, xzr, x6, hs
+# CHECK-NEXT: 1 1 0.13 csinc x7, x8, xzr, lo
+# CHECK-NEXT: 1 1 0.13 csinv w1, w0, w19, ne
+# CHECK-NEXT: 1 1 0.13 csinv wzr, w5, w9, eq
+# CHECK-NEXT: 1 1 0.13 csinv w9, wzr, w30, gt
+# CHECK-NEXT: 1 1 0.13 csinv w1, w28, wzr, mi
+# CHECK-NEXT: 1 1 0.13 csinv x19, x23, x29, lt
+# CHECK-NEXT: 1 1 0.13 csinv xzr, x3, x4, ge
+# CHECK-NEXT: 1 1 0.13 csinv x5, xzr, x6, hs
+# CHECK-NEXT: 1 1 0.13 csinv x7, x8, xzr, lo
+# CHECK-NEXT: 1 1 0.13 csneg w1, w0, w19, ne
+# CHECK-NEXT: 1 1 0.13 csneg wzr, w5, w9, eq
+# CHECK-NEXT: 1 1 0.13 csneg w9, wzr, w30, gt
+# CHECK-NEXT: 1 1 0.13 csneg w1, w28, wzr, mi
+# CHECK-NEXT: 1 1 0.13 csneg x19, x23, x29, lt
+# CHECK-NEXT: 1 1 0.13 csneg xzr, x3, x4, ge
+# CHECK-NEXT: 1 1 0.13 csneg x5, xzr, x6, hs
+# CHECK-NEXT: 1 1 0.13 csneg x7, x8, xzr, lo
+# CHECK-NEXT: 1 1 0.13 cset w3, eq
+# CHECK-NEXT: 1 1 0.13 cset x9, pl
+# CHECK-NEXT: 1 1 0.13 csetm w20, ne
+# CHECK-NEXT: 1 1 0.13 csetm x30, ge
+# CHECK-NEXT: 1 1 0.13 csinc w2, wzr, wzr, al
+# CHECK-NEXT: 1 1 0.13 csinv x3, xzr, xzr, nv
+# CHECK-NEXT: 1 1 0.13 cinc w3, w5, gt
+# CHECK-NEXT: 1 1 0.13 cinc wzr, w4, le
+# CHECK-NEXT: 1 1 0.13 cset w9, lt
+# CHECK-NEXT: 1 1 0.13 cinc x3, x5, gt
+# CHECK-NEXT: 1 1 0.13 cinc xzr, x4, le
+# CHECK-NEXT: 1 1 0.13 cset x9, lt
+# CHECK-NEXT: 1 1 0.13 csinc w5, w6, w6, nv
+# CHECK-NEXT: 1 1 0.13 csinc x1, x2, x2, al
+# CHECK-NEXT: 1 1 0.13 cinv w3, w5, gt
+# CHECK-NEXT: 1 1 0.13 cinv wzr, w4, le
+# CHECK-NEXT: 1 1 0.13 csetm w9, lt
+# CHECK-NEXT: 1 1 0.13 cinv x3, x5, gt
+# CHECK-NEXT: 1 1 0.13 cinv xzr, x4, le
+# CHECK-NEXT: 1 1 0.13 csetm x9, lt
+# CHECK-NEXT: 1 1 0.13 csinv x1, x0, x0, al
+# CHECK-NEXT: 1 1 0.13 csinv w9, w8, w8, nv
+# CHECK-NEXT: 1 1 0.13 cneg w3, w5, gt
+# CHECK-NEXT: 1 1 0.13 cneg wzr, w4, le
+# CHECK-NEXT: 1 1 0.13 cneg w9, wzr, lt
+# CHECK-NEXT: 1 1 0.13 cneg x3, x5, gt
+# CHECK-NEXT: 1 1 0.13 cneg xzr, x4, le
+# CHECK-NEXT: 1 1 0.13 cneg x9, xzr, lt
+# CHECK-NEXT: 1 1 0.13 csneg x4, x8, x8, al
+# CHECK-NEXT: 1 1 0.13 csinv w9, w8, w8, nv
+# CHECK-NEXT: 1 1 0.13 rbit w0, w7
+# CHECK-NEXT: 1 1 0.13 rbit x18, x3
+# CHECK-NEXT: 1 1 0.13 rev16 w17, w1
+# CHECK-NEXT: 1 1 0.13 rev16 x5, x2
+# CHECK-NEXT: 1 1 0.13 rev w18, w0
+# CHECK-NEXT: 1 1 0.13 rev32 x20, x1
+# CHECK-NEXT: 1 1 0.13 rev x22, x2
+# CHECK-NEXT: 1 1 0.13 clz w24, w3
+# CHECK-NEXT: 1 1 0.13 clz x26, x4
+# CHECK-NEXT: 1 1 0.13 cls w3, w5
+# CHECK-NEXT: 1 1 0.13 cls x20, x5
+# CHECK-NEXT: 1 12 12.00 udiv w0, w7, w10
+# CHECK-NEXT: 1 20 20.00 udiv x9, x22, x4
+# CHECK-NEXT: 1 12 12.00 sdiv w12, w21, w0
+# CHECK-NEXT: 1 20 20.00 sdiv x13, x2, x1
+# CHECK-NEXT: 1 1 0.13 lsl w11, w12, w13
+# CHECK-NEXT: 1 1 0.13 lsl x14, x15, x16
+# CHECK-NEXT: 1 1 0.13 lsr w17, w18, w19
+# CHECK-NEXT: 1 1 0.13 lsr x20, x21, x22
+# CHECK-NEXT: 1 1 0.13 asr w23, w24, w25
+# CHECK-NEXT: 1 1 0.13 asr x26, x27, x28
+# CHECK-NEXT: 1 1 0.13 ror w0, w1, w2
+# CHECK-NEXT: 1 1 0.13 ror x3, x4, x5
+# CHECK-NEXT: 1 1 0.13 lsl w6, w7, w8
+# CHECK-NEXT: 1 1 0.13 lsl x9, x10, x11
+# CHECK-NEXT: 1 1 0.13 lsr w12, w13, w14
+# CHECK-NEXT: 1 1 0.13 lsr x15, x16, x17
+# CHECK-NEXT: 1 1 0.13 asr w18, w19, w20
+# CHECK-NEXT: 1 1 0.13 asr x21, x22, x23
+# CHECK-NEXT: 1 1 0.13 ror w24, w25, w26
+# CHECK-NEXT: 1 1 0.13 ror x27, x28, x29
+# CHECK-NEXT: 1 3 0.50 smulh x30, x29, x28
+# CHECK-NEXT: 1 3 0.50 smulh xzr, x27, x26
+# CHECK-NEXT: 1 3 0.50 umulh x30, x29, x28
+# CHECK-NEXT: 1 3 0.50 umulh x23, x30, xzr
+# CHECK-NEXT: 1 2 0.50 madd w1, w3, w7, w4
+# CHECK-NEXT: 1 2 0.50 madd wzr, w0, w9, w11
+# CHECK-NEXT: 1 2 0.50 madd w13, wzr, w4, w4
+# CHECK-NEXT: 1 2 0.50 madd w19, w30, wzr, w29
+# CHECK-NEXT: 1 2 0.50 mul w4, w5, w6
+# CHECK-NEXT: 1 2 0.50 madd x1, x3, x7, x4
+# CHECK-NEXT: 1 2 0.50 madd xzr, x0, x9, x11
+# CHECK-NEXT: 1 2 0.50 madd x13, xzr, x4, x4
+# CHECK-NEXT: 1 2 0.50 madd x19, x30, xzr, x29
+# CHECK-NEXT: 1 2 0.50 mul x4, x5, x6
+# CHECK-NEXT: 1 2 0.50 msub w1, w3, w7, w4
+# CHECK-NEXT: 1 2 0.50 msub wzr, w0, w9, w11
+# CHECK-NEXT: 1 2 0.50 msub w13, wzr, w4, w4
+# CHECK-NEXT: 1 2 0.50 msub w19, w30, wzr, w29
+# CHECK-NEXT: 1 2 0.50 mneg w4, w5, w6
+# CHECK-NEXT: 1 2 0.50 msub x1, x3, x7, x4
+# CHECK-NEXT: 1 2 0.50 msub xzr, x0, x9, x11
+# CHECK-NEXT: 1 2 0.50 msub x13, xzr, x4, x4
+# CHECK-NEXT: 1 2 0.50 msub x19, x30, xzr, x29
+# CHECK-NEXT: 1 2 0.50 mneg x4, x5, x6
+# CHECK-NEXT: 1 2 0.50 smaddl x3, w5, w2, x9
+# CHECK-NEXT: 1 2 0.50 smaddl xzr, w10, w11, x12
+# CHECK-NEXT: 1 2 0.50 smaddl x13, wzr, w14, x15
+# CHECK-NEXT: 1 2 0.50 smaddl x16, w17, wzr, x18
+# CHECK-NEXT: 1 2 0.50 smull x19, w20, w21
+# CHECK-NEXT: 1 2 0.50 smsubl x3, w5, w2, x9
+# CHECK-NEXT: 1 2 0.50 smsubl xzr, w10, w11, x12
+# CHECK-NEXT: 1 2 0.50 smsubl x13, wzr, w14, x15
+# CHECK-NEXT: 1 2 0.50 smsubl x16, w17, wzr, x18
+# CHECK-NEXT: 1 2 0.50 smnegl x19, w20, w21
+# CHECK-NEXT: 1 2 0.50 umaddl x3, w5, w2, x9
+# CHECK-NEXT: 1 2 0.50 umaddl xzr, w10, w11, x12
+# CHECK-NEXT: 1 2 0.50 umaddl x13, wzr, w14, x15
+# CHECK-NEXT: 1 2 0.50 umaddl x16, w17, wzr, x18
+# CHECK-NEXT: 1 2 0.50 umull x19, w20, w21
+# CHECK-NEXT: 1 2 0.50 umsubl x3, w5, w2, x9
+# CHECK-NEXT: 1 2 0.50 umsubl x16, w17, wzr, x18
+# CHECK-NEXT: 1 2 0.50 umnegl x19, w20, w21
+# CHECK-NEXT: 1 3 0.50 smulh x30, x29, x28
+# CHECK-NEXT: 1 3 0.50 smulh x23, x22, xzr
+# CHECK-NEXT: 1 3 0.50 umulh x23, x22, xzr
+# CHECK-NEXT: 1 2 0.50 mul x19, x20, xzr
+# CHECK-NEXT: 1 2 0.50 mneg w21, w22, w23
+# CHECK-NEXT: 1 2 0.50 smull x11, w13, w17
+# CHECK-NEXT: 1 2 0.50 umull x11, w13, w17
+# CHECK-NEXT: 1 2 0.50 smnegl x11, w13, w17
+# CHECK-NEXT: 1 2 0.50 umnegl x11, w13, w17
+# CHECK-NEXT: 2 3 0.50 extr w3, w5, w7, #0
+# CHECK-NEXT: 2 3 0.50 extr w11, w13, w17, #31
+# CHECK-NEXT: 2 3 0.50 extr x3, x5, x7, #15
+# CHECK-NEXT: 2 3 0.50 extr x11, x13, x17, #63
+# CHECK-NEXT: 1 1 0.13 ror x19, x23, #24
+# CHECK-NEXT: 1 1 0.13 ror x29, xzr, #63
+# CHECK-NEXT: 1 1 0.13 ror w9, w13, #31
+# CHECK-NEXT: 1 2 1.00 fcmp s3, s5
+# CHECK-NEXT: 1 2 1.00 fcmp s31, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmp s31, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmpe s29, s30
+# CHECK-NEXT: 1 2 1.00 fcmpe s15, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmpe s15, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmp d4, d12
+# CHECK-NEXT: 1 2 1.00 fcmp d23, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmp d23, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmpe d26, d22
+# CHECK-NEXT: 1 2 1.00 fcmpe d29, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmpe d29, #0.0
+# CHECK-NEXT: 1 2 1.00 fccmp s1, s31, #0, eq
+# CHECK-NEXT: 1 2 1.00 fccmp s3, s0, #15, hs
+# CHECK-NEXT: 1 2 1.00 fccmp s31, s15, #13, hs
+# CHECK-NEXT: 1 2 1.00 fccmp d9, d31, #0, le
+# CHECK-NEXT: 1 2 1.00 fccmp d3, d0, #15, gt
+# CHECK-NEXT: 1 2 1.00 fccmp d31, d5, #7, ne
+# CHECK-NEXT: 1 2 1.00 fccmpe s1, s31, #0, eq
+# CHECK-NEXT: 1 2 1.00 fccmpe s3, s0, #15, hs
+# CHECK-NEXT: 1 2 1.00 fccmpe s31, s15, #13, hs
+# CHECK-NEXT: 1 2 1.00 fccmpe d9, d31, #0, le
+# CHECK-NEXT: 1 2 1.00 fccmpe d3, d0, #15, gt
+# CHECK-NEXT: 1 2 1.00 fccmpe d31, d5, #7, ne
+# CHECK-NEXT: 1 2 0.50 fcsel s3, s20, s9, pl
+# CHECK-NEXT: 1 2 0.50 fcsel d9, d10, d11, mi
+# CHECK-NEXT: 1 2 0.50 fmov s0, s1
+# CHECK-NEXT: 1 2 0.50 fabs s2, s3
+# CHECK-NEXT: 1 2 0.50 fneg s4, s5
+# CHECK-NEXT: 1 8 1.00 fsqrt s6, s7
+# CHECK-NEXT: 1 3 0.50 fcvt d8, s9
+# CHECK-NEXT: 1 3 0.50 fcvt h10, s11
+# CHECK-NEXT: 1 3 1.00 frintn s12, s13
+# CHECK-NEXT: 1 3 1.00 frintp s14, s15
+# CHECK-NEXT: 1 3 1.00 frintm s16, s17
+# CHECK-NEXT: 1 3 1.00 frintz s18, s19
+# CHECK-NEXT: 1 3 1.00 frinta s20, s21
+# CHECK-NEXT: 1 3 1.00 frintx s22, s23
+# CHECK-NEXT: 1 3 1.00 frinti s24, s25
+# CHECK-NEXT: 1 2 0.50 fmov d0, d1
+# CHECK-NEXT: 1 2 0.50 fabs d2, d3
+# CHECK-NEXT: 1 2 0.50 fneg d4, d5
+# CHECK-NEXT: 1 13 8.00 fsqrt d6, d7
+# CHECK-NEXT: 1 3 0.50 fcvt s8, d9
+# CHECK-NEXT: 1 3 0.50 fcvt h10, d11
+# CHECK-NEXT: 1 3 1.00 frintn d12, d13
+# CHECK-NEXT: 1 3 1.00 frintp d14, d15
+# CHECK-NEXT: 1 3 1.00 frintm d16, d17
+# CHECK-NEXT: 1 3 1.00 frintz d18, d19
+# CHECK-NEXT: 1 3 1.00 frinta d20, d21
+# CHECK-NEXT: 1 3 1.00 frintx d22, d23
+# CHECK-NEXT: 1 3 1.00 frinti d24, d25
+# CHECK-NEXT: 1 3 0.50 fcvt s26, h27
+# CHECK-NEXT: 1 3 0.50 fcvt d28, h29
+# CHECK-NEXT: 1 3 0.50 fmul s20, s19, s17
+# CHECK-NEXT: 1 8 1.00 fdiv s1, s2, s3
+# CHECK-NEXT: 1 2 0.50 fadd s4, s5, s6
+# CHECK-NEXT: 1 2 0.50 fsub s7, s8, s9
+# CHECK-NEXT: 1 2 0.50 fmax s10, s11, s12
+# CHECK-NEXT: 1 2 0.50 fmin s13, s14, s15
+# CHECK-NEXT: 1 2 0.50 fmaxnm s16, s17, s18
+# CHECK-NEXT: 1 2 0.50 fminnm s19, s20, s21
+# CHECK-NEXT: 1 3 0.50 fnmul s22, s23, s2
+# CHECK-NEXT: 1 3 0.50 fmul d20, d19, d17
+# CHECK-NEXT: 1 13 8.00 fdiv d1, d2, d3
+# CHECK-NEXT: 1 2 0.50 fadd d4, d5, d6
+# CHECK-NEXT: 1 2 0.50 fsub d7, d8, d9
+# CHECK-NEXT: 1 2 0.50 fmax d10, d11, d12
+# CHECK-NEXT: 1 2 0.50 fmin d13, d14, d15
+# CHECK-NEXT: 1 2 0.50 fmaxnm d16, d17, d18
+# CHECK-NEXT: 1 2 0.50 fminnm d19, d20, d21
+# CHECK-NEXT: 1 3 0.50 fnmul d22, d23, d24
+# CHECK-NEXT: 1 4 0.50 fmadd s3, s5, s6, s31
+# CHECK-NEXT: 1 4 0.50 fmadd d3, d13, d0, d23
+# CHECK-NEXT: 1 4 0.50 fmsub s3, s5, s6, s31
+# CHECK-NEXT: 1 4 0.50 fmsub d3, d13, d0, d23
+# CHECK-NEXT: 1 4 0.50 fnmadd s3, s5, s6, s31
+# CHECK-NEXT: 1 4 0.50 fnmadd d3, d13, d0, d23
+# CHECK-NEXT: 1 4 0.50 fnmsub s3, s5, s6, s31
+# CHECK-NEXT: 1 4 0.50 fnmsub d3, d13, d0, d23
+# CHECK-NEXT: 1 3 1.00 fcvtzs w3, h5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs wzr, h20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzs w19, h0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzs x3, h5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, h30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzs x19, h0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzs w3, s5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs wzr, s20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzs w19, s0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzs x3, s5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, s30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzs x19, s0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzs w3, d5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs wzr, d20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzs w19, d0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzs x3, d5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, d30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzs x19, d0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzu w3, h5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu wzr, h20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w19, h0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzu x3, h5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu x12, h30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzu x19, h0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzu w3, s5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu wzr, s20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w19, s0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzu x3, s5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu x12, s30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzu x19, s0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtzu w3, d5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu wzr, d20, #13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w19, d0, #32
+# CHECK-NEXT: 1 3 1.00 fcvtzu x3, d5, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu x12, d30, #45
+# CHECK-NEXT: 1 3 1.00 fcvtzu x19, d0, #64
+# CHECK-NEXT: 1 3 1.00 scvtf h23, w19, #1
+# CHECK-NEXT: 1 3 1.00 scvtf h31, wzr, #20
+# CHECK-NEXT: 1 3 1.00 scvtf h14, w0, #32
+# CHECK-NEXT: 1 3 1.00 scvtf h23, x19, #1
+# CHECK-NEXT: 1 3 1.00 scvtf h31, xzr, #20
+# CHECK-NEXT: 1 3 1.00 scvtf h14, x0, #64
+# CHECK-NEXT: 1 3 1.00 scvtf s23, w19, #1
+# CHECK-NEXT: 1 3 1.00 scvtf s31, wzr, #20
+# CHECK-NEXT: 1 3 1.00 scvtf s14, w0, #32
+# CHECK-NEXT: 1 3 1.00 scvtf s23, x19, #1
+# CHECK-NEXT: 1 3 1.00 scvtf s31, xzr, #20
+# CHECK-NEXT: 1 3 1.00 scvtf s14, x0, #64
+# CHECK-NEXT: 1 3 1.00 scvtf d23, w19, #1
+# CHECK-NEXT: 1 3 1.00 scvtf d31, wzr, #20
+# CHECK-NEXT: 1 3 1.00 scvtf d14, w0, #32
+# CHECK-NEXT: 1 3 1.00 scvtf d23, x19, #1
+# CHECK-NEXT: 1 3 1.00 scvtf d31, xzr, #20
+# CHECK-NEXT: 1 3 1.00 scvtf d14, x0, #64
+# CHECK-NEXT: 1 3 1.00 ucvtf h23, w19, #1
+# CHECK-NEXT: 1 3 1.00 ucvtf h31, wzr, #20
+# CHECK-NEXT: 1 3 1.00 ucvtf h14, w0, #32
+# CHECK-NEXT: 1 3 1.00 ucvtf h23, x19, #1
+# CHECK-NEXT: 1 3 1.00 ucvtf h31, xzr, #20
+# CHECK-NEXT: 1 3 1.00 ucvtf h14, x0, #64
+# CHECK-NEXT: 1 3 1.00 ucvtf s23, w19, #1
+# CHECK-NEXT: 1 3 1.00 ucvtf s31, wzr, #20
+# CHECK-NEXT: 1 3 1.00 ucvtf s14, w0, #32
+# CHECK-NEXT: 1 3 1.00 ucvtf s23, x19, #1
+# CHECK-NEXT: 1 3 1.00 ucvtf s31, xzr, #20
+# CHECK-NEXT: 1 3 1.00 ucvtf s14, x0, #64
+# CHECK-NEXT: 1 3 1.00 ucvtf d23, w19, #1
+# CHECK-NEXT: 1 3 1.00 ucvtf d31, wzr, #20
+# CHECK-NEXT: 1 3 1.00 ucvtf d14, w0, #32
+# CHECK-NEXT: 1 3 1.00 ucvtf d23, x19, #1
+# CHECK-NEXT: 1 3 1.00 ucvtf d31, xzr, #20
+# CHECK-NEXT: 1 3 1.00 ucvtf d14, x0, #64
+# CHECK-NEXT: 1 3 1.00 fcvtns w3, h31
+# CHECK-NEXT: 1 3 1.00 fcvtns xzr, h12
+# CHECK-NEXT: 1 3 1.00 fcvtnu wzr, h12
+# CHECK-NEXT: 1 3 1.00 fcvtnu x0, h0
+# CHECK-NEXT: 1 3 1.00 fcvtps wzr, h9
+# CHECK-NEXT: 1 3 1.00 fcvtps x12, h20
+# CHECK-NEXT: 1 3 1.00 fcvtpu w30, h23
+# CHECK-NEXT: 1 3 1.00 fcvtpu x29, h3
+# CHECK-NEXT: 1 3 1.00 fcvtms w2, h3
+# CHECK-NEXT: 1 3 1.00 fcvtms x4, h5
+# CHECK-NEXT: 1 3 1.00 fcvtmu w6, h7
+# CHECK-NEXT: 1 3 1.00 fcvtmu x8, h9
+# CHECK-NEXT: 1 3 1.00 fcvtzs w10, h11
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, h13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w14, h15
+# CHECK-NEXT: 1 3 1.00 fcvtzu x15, h16
+# CHECK-NEXT: 1 3 1.00 scvtf h17, w18
+# CHECK-NEXT: 1 3 1.00 scvtf h19, x20
+# CHECK-NEXT: 1 3 1.00 ucvtf h21, w22
+# CHECK-NEXT: 1 3 1.00 scvtf h23, x24
+# CHECK-NEXT: 1 3 1.00 fcvtas w25, h26
+# CHECK-NEXT: 1 3 1.00 fcvtas x27, h28
+# CHECK-NEXT: 1 3 1.00 fcvtau w29, h30
+# CHECK-NEXT: 1 3 1.00 fcvtau xzr, h0
+# CHECK-NEXT: 1 3 1.00 fcvtns w3, s31
+# CHECK-NEXT: 1 3 1.00 fcvtns xzr, s12
+# CHECK-NEXT: 1 3 1.00 fcvtnu wzr, s12
+# CHECK-NEXT: 1 3 1.00 fcvtnu x0, s0
+# CHECK-NEXT: 1 3 1.00 fcvtps wzr, s9
+# CHECK-NEXT: 1 3 1.00 fcvtps x12, s20
+# CHECK-NEXT: 1 3 1.00 fcvtpu w30, s23
+# CHECK-NEXT: 1 3 1.00 fcvtpu x29, s3
+# CHECK-NEXT: 1 3 1.00 fcvtms w2, s3
+# CHECK-NEXT: 1 3 1.00 fcvtms x4, s5
+# CHECK-NEXT: 1 3 1.00 fcvtmu w6, s7
+# CHECK-NEXT: 1 3 1.00 fcvtmu x8, s9
+# CHECK-NEXT: 1 3 1.00 fcvtzs w10, s11
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, s13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w14, s15
+# CHECK-NEXT: 1 3 1.00 fcvtzu x15, s16
+# CHECK-NEXT: 1 3 1.00 scvtf s17, w18
+# CHECK-NEXT: 1 3 1.00 scvtf s19, x20
+# CHECK-NEXT: 1 3 1.00 ucvtf s21, w22
+# CHECK-NEXT: 1 3 1.00 scvtf s23, x24
+# CHECK-NEXT: 1 3 1.00 fcvtas w25, s26
+# CHECK-NEXT: 1 3 1.00 fcvtas x27, s28
+# CHECK-NEXT: 1 3 1.00 fcvtau w29, s30
+# CHECK-NEXT: 1 3 1.00 fcvtau xzr, s0
+# CHECK-NEXT: 1 3 1.00 fcvtns w3, d31
+# CHECK-NEXT: 1 3 1.00 fcvtns xzr, d12
+# CHECK-NEXT: 1 3 1.00 fcvtnu wzr, d12
+# CHECK-NEXT: 1 3 1.00 fcvtnu x0, d0
+# CHECK-NEXT: 1 3 1.00 fcvtps wzr, d9
+# CHECK-NEXT: 1 3 1.00 fcvtps x12, d20
+# CHECK-NEXT: 1 3 1.00 fcvtpu w30, d23
+# CHECK-NEXT: 1 3 1.00 fcvtpu x29, d3
+# CHECK-NEXT: 1 3 1.00 fcvtms w2, d3
+# CHECK-NEXT: 1 3 1.00 fcvtms x4, d5
+# CHECK-NEXT: 1 3 1.00 fcvtmu w6, d7
+# CHECK-NEXT: 1 3 1.00 fcvtmu x8, d9
+# CHECK-NEXT: 1 3 1.00 fcvtzs w10, d11
+# CHECK-NEXT: 1 3 1.00 fcvtzs x12, d13
+# CHECK-NEXT: 1 3 1.00 fcvtzu w14, d15
+# CHECK-NEXT: 1 3 1.00 fcvtzu x15, d16
+# CHECK-NEXT: 1 3 1.00 scvtf d17, w18
+# CHECK-NEXT: 1 3 1.00 scvtf d19, x20
+# CHECK-NEXT: 1 3 1.00 ucvtf d21, w22
+# CHECK-NEXT: 1 3 1.00 ucvtf d23, x24
+# CHECK-NEXT: 1 3 1.00 fcvtas w25, d26
+# CHECK-NEXT: 1 3 1.00 fcvtas x27, d28
+# CHECK-NEXT: 1 3 1.00 fcvtau w29, d30
+# CHECK-NEXT: 1 3 1.00 fcvtau xzr, d0
+# CHECK-NEXT: 2 2 1.00 fmov w3, s9
+# CHECK-NEXT: 1 3 1.00 fmov s9, w3
+# CHECK-NEXT: 2 2 1.00 fmov x20, d31
+# CHECK-NEXT: 1 3 1.00 fmov d1, x15
+# CHECK-NEXT: 2 2 1.00 fmov x3, v12.d[1]
+# CHECK-NEXT: 2 5 1.00 fmov v1.d[1], x19
+# CHECK-NEXT: 1 2 0.50 fmov s2, #0.12500000
+# CHECK-NEXT: 1 2 0.50 fmov s3, #1.00000000
+# CHECK-NEXT: 1 2 0.50 fmov d30, #16.00000000
+# CHECK-NEXT: 1 2 0.50 fmov s4, #1.06250000
+# CHECK-NEXT: 1 2 0.50 fmov d10, #1.93750000
+# CHECK-NEXT: 1 2 0.50 fmov s12, #-1.00000000
+# CHECK-NEXT: 1 2 0.50 fmov d16, #8.50000000
+# CHECK-NEXT: 2 5 0.33 * ldr w3, #0
+# CHECK-NEXT: 2 5 0.33 * ldr x29, #4
+# CHECK-NEXT: 2 5 0.33 * ldrsw xzr, #-4
+# CHECK-NEXT: 2 7 0.33 * ldr s0, #8
+# CHECK-NEXT: 2 7 0.33 * ldr d0, #1048572
+# CHECK-NEXT: 2 7 0.33 * ldr q0, #-1048576
+# CHECK-NEXT: 2 5 0.33 U prfm pldl1strm, #0
+# CHECK-NEXT: 2 5 0.33 U prfm #22, #0
+# CHECK-NEXT: 3 5 0.50 * * U stxrb w18, w8, [sp]
+# CHECK-NEXT: 3 5 0.50 * * U stxrh w24, w15, [x16]
+# CHECK-NEXT: 3 5 0.50 * * U stxr w5, w6, [x17]
+# CHECK-NEXT: 3 5 0.50 * * U stxr w1, x10, [x21]
+# CHECK-NEXT: 1 4 0.33 * * U ldxrb w30, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U ldxrh w17, [x4]
+# CHECK-NEXT: 1 4 0.33 * * U ldxr w22, [sp]
+# CHECK-NEXT: 1 4 0.33 * * U ldxr x11, [x29]
+# CHECK-NEXT: 1 4 0.33 * * U ldxr x11, [x29]
+# CHECK-NEXT: 1 4 0.33 * * U ldxr x11, [x29]
+# CHECK-NEXT: 3 5 0.50 * * U stxp w12, w11, w10, [sp]
+# CHECK-NEXT: 3 5 0.50 * * U stxp wzr, x27, x9, [x12]
+# CHECK-NEXT: 2 4 0.33 * * U ldxp w0, wzr, [sp]
+# CHECK-NEXT: 2 4 0.33 * * U ldxp x17, x0, [x18]
+# CHECK-NEXT: 2 4 0.33 * * U ldxp x17, x0, [x18]
+# CHECK-NEXT: 3 5 0.50 * * U stlxrb w12, w22, [x0]
+# CHECK-NEXT: 3 5 0.50 * * U stlxrh w10, w1, [x1]
+# CHECK-NEXT: 3 5 0.50 * * U stlxr w9, w2, [x2]
+# CHECK-NEXT: 3 5 0.50 * * U stlxr w9, x3, [sp]
+# CHECK-NEXT: 1 4 0.33 * * U ldaxrb w8, [x4]
+# CHECK-NEXT: 1 4 0.33 * * U ldaxrh w7, [x5]
+# CHECK-NEXT: 1 4 0.33 * * U ldaxr w6, [sp]
+# CHECK-NEXT: 1 4 0.33 * * U ldaxr x5, [x6]
+# CHECK-NEXT: 1 4 0.33 * * U ldaxr x5, [x6]
+# CHECK-NEXT: 1 4 0.33 * * U ldaxr x5, [x6]
+# CHECK-NEXT: 3 5 0.50 * * U stlxp w4, w5, w6, [sp]
+# CHECK-NEXT: 3 5 0.50 * * U stlxp wzr, x6, x7, [x1]
+# CHECK-NEXT: 2 4 0.33 * * U ldaxp w5, w18, [sp]
+# CHECK-NEXT: 2 4 0.33 * * U ldaxp x6, x19, [x22]
+# CHECK-NEXT: 2 4 0.33 * * U ldaxp x6, x19, [x22]
+# CHECK-NEXT: 2 1 0.50 * U stlrb w24, [sp]
+# CHECK-NEXT: 2 1 0.50 * U stlrh w25, [x30]
+# CHECK-NEXT: 2 1 0.50 * U stlr w26, [x29]
+# CHECK-NEXT: 2 1 0.50 * U stlr x27, [x28]
+# CHECK-NEXT: 2 1 0.50 * U stlr x27, [x28]
+# CHECK-NEXT: 2 1 0.50 * U stlr x27, [x28]
+# CHECK-NEXT: 1 4 0.33 * U ldarb w23, [sp]
+# CHECK-NEXT: 1 4 0.33 * U ldarh w22, [x30]
+# CHECK-NEXT: 1 4 0.33 * U ldar wzr, [x29]
+# CHECK-NEXT: 1 4 0.33 * U ldar x21, [x28]
+# CHECK-NEXT: 1 4 0.33 * U ldar x21, [x28]
+# CHECK-NEXT: 1 4 0.33 * U ldar x21, [x28]
+# CHECK-NEXT: 2 1 0.50 * sturb w9, [sp]
+# CHECK-NEXT: 2 1 0.50 * sturh wzr, [x12, #255]
+# CHECK-NEXT: 2 1 0.50 * stur w16, [x0, #-256]
+# CHECK-NEXT: 2 1 0.50 * stur x28, [x14, #1]
+# CHECK-NEXT: 1 4 0.33 * ldurb w1, [x20, #255]
+# CHECK-NEXT: 1 4 0.33 * ldurh w20, [x1, #255]
+# CHECK-NEXT: 1 4 0.33 * ldur w12, [sp, #255]
+# CHECK-NEXT: 1 4 0.33 * ldur xzr, [x12, #255]
+# CHECK-NEXT: 1 4 0.33 * ldursb x9, [x7, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldursh x17, [x19, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldursw x20, [x15, #-256]
+# CHECK-NEXT: 1 4 0.33 U prfum pldl2keep, [sp, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldursb w19, [x1, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldursh w15, [x21, #-256]
+# CHECK-NEXT: 2 2 0.50 * stur b0, [sp, #1]
+# CHECK-NEXT: 2 2 0.50 * stur h12, [x12, #-1]
+# CHECK-NEXT: 2 2 0.50 * stur s15, [x0, #255]
+# CHECK-NEXT: 2 2 0.50 * stur d31, [x5, #25]
+# CHECK-NEXT: 2 2 0.50 * stur q9, [x5]
+# CHECK-NEXT: 1 6 0.33 * ldur b3, [sp]
+# CHECK-NEXT: 1 6 0.33 * ldur h5, [x4, #-256]
+# CHECK-NEXT: 1 6 0.33 * ldur s7, [x12, #-1]
+# CHECK-NEXT: 1 6 0.33 * ldur d11, [x19, #4]
+# CHECK-NEXT: 1 6 0.33 * ldur q13, [x1, #2]
+# CHECK-NEXT: 3 1 0.50 * strb w9, [x2], #255
+# CHECK-NEXT: 3 1 0.50 * strb w10, [x3], #1
+# CHECK-NEXT: 3 1 0.50 * strb w10, [x3], #-256
+# CHECK-NEXT: 3 1 0.50 * strh w9, [x2], #255
+# CHECK-NEXT: 3 1 0.50 * strh w9, [x2], #1
+# CHECK-NEXT: 3 1 0.50 * strh w10, [x3], #-256
+# CHECK-NEXT: 3 1 0.50 * str w19, [sp], #255
+# CHECK-NEXT: 3 1 0.50 * str w20, [x30], #1
+# CHECK-NEXT: 3 1 0.50 * str w21, [x12], #-256
+# CHECK-NEXT: 3 1 0.50 * str xzr, [x9], #255
+# CHECK-NEXT: 3 1 0.50 * str x2, [x3], #1
+# CHECK-NEXT: 3 1 0.50 * str x19, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrb w9, [x2], #255
+# CHECK-NEXT: 2 4 0.33 * ldrb w10, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldrb w10, [x3], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrh w9, [x2], #255
+# CHECK-NEXT: 2 4 0.33 * ldrh w9, [x2], #1
+# CHECK-NEXT: 2 4 0.33 * ldrh w10, [x3], #-256
+# CHECK-NEXT: 2 4 0.33 * ldr w19, [sp], #255
+# CHECK-NEXT: 2 4 0.33 * ldr w20, [x30], #1
+# CHECK-NEXT: 2 4 0.33 * ldr w21, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldr xzr, [x9], #255
+# CHECK-NEXT: 2 4 0.33 * ldr x2, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldr x19, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrsb xzr, [x9], #255
+# CHECK-NEXT: 2 4 0.33 * ldrsb x2, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldrsb x19, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrsh xzr, [x9], #255
+# CHECK-NEXT: 2 4 0.33 * ldrsh x2, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldrsh x19, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrsw xzr, [x9], #255
+# CHECK-NEXT: 2 4 0.33 * ldrsw x2, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldrsw x19, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrsb wzr, [x9], #255
+# CHECK-NEXT: 2 4 0.33 * ldrsb w2, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldrsb w19, [x12], #-256
+# CHECK-NEXT: 2 4 0.33 * ldrsh wzr, [x9], #255
+# CHECK-NEXT: 2 4 0.33 * ldrsh w2, [x3], #1
+# CHECK-NEXT: 2 4 0.33 * ldrsh w19, [x12], #-256
+# CHECK-NEXT: 4 2 0.50 * str b0, [x0], #255
+# CHECK-NEXT: 4 2 0.50 * str b3, [x3], #1
+# CHECK-NEXT: 4 2 0.50 * str b5, [sp], #-256
+# CHECK-NEXT: 4 2 0.50 * str h10, [x10], #255
+# CHECK-NEXT: 4 2 0.50 * str h13, [x23], #1
+# CHECK-NEXT: 4 2 0.50 * str h15, [sp], #-256
+# CHECK-NEXT: 4 2 0.50 * str s20, [x20], #255
+# CHECK-NEXT: 4 2 0.50 * str s23, [x23], #1
+# CHECK-NEXT: 4 2 0.50 * str s25, [x0], #-256
+# CHECK-NEXT: 4 2 0.50 * str d20, [x20], #255
+# CHECK-NEXT: 4 2 0.50 * str d23, [x23], #1
+# CHECK-NEXT: 4 2 0.50 * str d25, [x0], #-256
+# CHECK-NEXT: 3 6 0.33 * ldr b0, [x0], #255
+# CHECK-NEXT: 3 6 0.33 * ldr b3, [x3], #1
+# CHECK-NEXT: 3 6 0.33 * ldr b5, [sp], #-256
+# CHECK-NEXT: 3 6 0.33 * ldr h10, [x10], #255
+# CHECK-NEXT: 3 6 0.33 * ldr h13, [x23], #1
+# CHECK-NEXT: 3 6 0.33 * ldr h15, [sp], #-256
+# CHECK-NEXT: 3 6 0.33 * ldr s20, [x20], #255
+# CHECK-NEXT: 3 6 0.33 * ldr s23, [x23], #1
+# CHECK-NEXT: 3 6 0.33 * ldr s25, [x0], #-256
+# CHECK-NEXT: 3 6 0.33 * ldr d20, [x20], #255
+# CHECK-NEXT: 3 6 0.33 * ldr d23, [x23], #1
+# CHECK-NEXT: 3 6 0.33 * ldr d25, [x0], #-256
+# CHECK-NEXT: 3 6 0.33 * ldr q20, [x1], #255
+# CHECK-NEXT: 3 6 0.33 * ldr q23, [x9], #1
+# CHECK-NEXT: 3 6 0.33 * ldr q25, [x20], #-256
+# CHECK-NEXT: 4 2 0.50 * str q10, [x1], #255
+# CHECK-NEXT: 4 2 0.50 * str q22, [sp], #1
+# CHECK-NEXT: 4 2 0.50 * str q21, [x20], #-256
+# CHECK-NEXT: 2 4 0.33 * ldr x3, [x4, #0]!
+# CHECK-NEXT: 3 1 0.50 * strb w9, [x2, #255]!
+# CHECK-NEXT: 3 1 0.50 * strb w10, [x3, #1]!
+# CHECK-NEXT: 3 1 0.50 * strb w10, [x3, #-256]!
+# CHECK-NEXT: 3 1 0.50 * strh w9, [x2, #255]!
+# CHECK-NEXT: 3 1 0.50 * strh w9, [x2, #1]!
+# CHECK-NEXT: 3 1 0.50 * strh w10, [x3, #-256]!
+# CHECK-NEXT: 3 1 0.50 * str w19, [sp, #255]!
+# CHECK-NEXT: 3 1 0.50 * str w20, [x30, #1]!
+# CHECK-NEXT: 3 1 0.50 * str w21, [x12, #-256]!
+# CHECK-NEXT: 3 1 0.50 * str xzr, [x9, #255]!
+# CHECK-NEXT: 3 1 0.50 * str x2, [x3, #1]!
+# CHECK-NEXT: 3 1 0.50 * str x19, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrb w9, [x2, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrb w10, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrb w10, [x3, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrh w9, [x2, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrh w9, [x2, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrh w10, [x3, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldr w19, [sp, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldr w20, [x30, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldr w21, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldr xzr, [x9, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldr x2, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldr x19, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrsb xzr, [x9, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrsb x2, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrsb x19, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrsh xzr, [x9, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrsh x2, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrsh x19, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrsw xzr, [x9, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrsw x2, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrsw x19, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrsb wzr, [x9, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrsb w2, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrsb w19, [x12, #-256]!
+# CHECK-NEXT: 2 4 0.33 * ldrsh wzr, [x9, #255]!
+# CHECK-NEXT: 2 4 0.33 * ldrsh w2, [x3, #1]!
+# CHECK-NEXT: 2 4 0.33 * ldrsh w19, [x12, #-256]!
+# CHECK-NEXT: 4 2 0.50 * str b0, [x0, #255]!
+# CHECK-NEXT: 4 2 0.50 * str b3, [x3, #1]!
+# CHECK-NEXT: 4 2 0.50 * str b5, [sp, #-256]!
+# CHECK-NEXT: 4 2 0.50 * str h10, [x10, #255]!
+# CHECK-NEXT: 4 2 0.50 * str h13, [x23, #1]!
+# CHECK-NEXT: 4 2 0.50 * str h15, [sp, #-256]!
+# CHECK-NEXT: 4 2 0.50 * str s20, [x20, #255]!
+# CHECK-NEXT: 4 2 0.50 * str s23, [x23, #1]!
+# CHECK-NEXT: 4 2 0.50 * str s25, [x0, #-256]!
+# CHECK-NEXT: 4 2 0.50 * str d20, [x20, #255]!
+# CHECK-NEXT: 4 2 0.50 * str d23, [x23, #1]!
+# CHECK-NEXT: 4 2 0.50 * str d25, [x0, #-256]!
+# CHECK-NEXT: 3 6 0.33 * ldr b0, [x0, #255]!
+# CHECK-NEXT: 3 6 0.33 * ldr b3, [x3, #1]!
+# CHECK-NEXT: 3 6 0.33 * ldr b5, [sp, #-256]!
+# CHECK-NEXT: 3 6 0.33 * ldr h10, [x10, #255]!
+# CHECK-NEXT: 3 6 0.33 * ldr h13, [x23, #1]!
+# CHECK-NEXT: 3 6 0.33 * ldr h15, [sp, #-256]!
+# CHECK-NEXT: 3 6 0.33 * ldr s20, [x20, #255]!
+# CHECK-NEXT: 3 6 0.33 * ldr s23, [x23, #1]!
+# CHECK-NEXT: 3 6 0.33 * ldr s25, [x0, #-256]!
+# CHECK-NEXT: 3 6 0.33 * ldr d20, [x20, #255]!
+# CHECK-NEXT: 3 6 0.33 * ldr d23, [x23, #1]!
+# CHECK-NEXT: 3 6 0.33 * ldr d25, [x0, #-256]!
+# CHECK-NEXT: 3 6 0.33 * ldr q20, [x1, #255]!
+# CHECK-NEXT: 3 6 0.33 * ldr q23, [x9, #1]!
+# CHECK-NEXT: 3 6 0.33 * ldr q25, [x20, #-256]!
+# CHECK-NEXT: 4 2 0.50 * str q10, [x1, #255]!
+# CHECK-NEXT: 4 2 0.50 * str q22, [sp, #1]!
+# CHECK-NEXT: 4 2 0.50 * str q21, [x20, #-256]!
+# CHECK-NEXT: 2 1 0.50 * sttrb w9, [sp]
+# CHECK-NEXT: 2 1 0.50 * sttrh wzr, [x12, #255]
+# CHECK-NEXT: 2 1 0.50 * sttr w16, [x0, #-256]
+# CHECK-NEXT: 2 1 0.50 * sttr x28, [x14, #1]
+# CHECK-NEXT: 1 4 0.33 * ldtrb w1, [x20, #255]
+# CHECK-NEXT: 1 4 0.33 * ldtrh w20, [x1, #255]
+# CHECK-NEXT: 1 4 0.33 * ldtr w12, [sp, #255]
+# CHECK-NEXT: 1 4 0.33 * ldtr xzr, [x12, #255]
+# CHECK-NEXT: 1 4 0.33 * ldtrsb x9, [x7, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldtrsh x17, [x19, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldtrsw x20, [x15, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldtrsb w19, [x1, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldtrsh w15, [x21, #-256]
+# CHECK-NEXT: 1 4 0.33 * ldr x4, [x29]
+# CHECK-NEXT: 1 4 0.33 * ldr x30, [x12, #32760]
+# CHECK-NEXT: 1 4 0.33 * ldr x20, [sp, #8]
+# CHECK-NEXT: 1 4 0.33 * ldr xzr, [sp]
+# CHECK-NEXT: 1 4 0.33 * ldr w2, [sp]
+# CHECK-NEXT: 1 4 0.33 * ldr w17, [sp, #16380]
+# CHECK-NEXT: 1 4 0.33 * ldr w13, [x2, #4]
+# CHECK-NEXT: 1 4 0.33 * ldrsw x2, [x5, #4]
+# CHECK-NEXT: 1 4 0.33 * ldrsw x23, [sp, #16380]
+# CHECK-NEXT: 1 4 0.33 * ldrh w2, [x4]
+# CHECK-NEXT: 1 4 0.33 * ldrsh w23, [x6, #8190]
+# CHECK-NEXT: 1 4 0.33 * ldrsh wzr, [sp, #2]
+# CHECK-NEXT: 1 4 0.33 * ldrsh x29, [x2, #2]
+# CHECK-NEXT: 1 4 0.33 * ldrb w26, [x3, #121]
+# CHECK-NEXT: 1 4 0.33 * ldrb w12, [x2]
+# CHECK-NEXT: 1 4 0.33 * ldrsb w27, [sp, #4095]
+# CHECK-NEXT: 1 4 0.33 * ldrsb xzr, [x15]
+# CHECK-NEXT: 2 1 0.50 * str x30, [sp]
+# CHECK-NEXT: 2 1 0.50 * str w20, [x4, #16380]
+# CHECK-NEXT: 2 1 0.50 * strh w17, [sp, #8190]
+# CHECK-NEXT: 2 1 0.50 * strb w23, [x3, #4095]
+# CHECK-NEXT: 2 1 0.50 * strb wzr, [x2]
+# CHECK-NEXT: 1 6 0.33 * ldr b31, [sp, #4095]
+# CHECK-NEXT: 1 6 0.33 * ldr h20, [x2, #8190]
+# CHECK-NEXT: 1 6 0.33 * ldr s10, [x19, #16380]
+# CHECK-NEXT: 1 6 0.33 * ldr d3, [x10, #32760]
+# CHECK-NEXT: 2 2 0.50 * str q12, [sp, #65520]
+# CHECK-NEXT: 1 6 0.33 * ldr h3, [sp, x5]
+# CHECK-NEXT: 1 6 0.33 * ldr h9, [x27, x6]
+# CHECK-NEXT: 2 7 0.33 * ldr h10, [x30, x7, lsl #1]
+# CHECK-NEXT: 2 2 0.50 * str h11, [x29, x3, sxtx]
+# CHECK-NEXT: 2 2 0.50 * str h12, [x28, xzr, sxtx]
+# CHECK-NEXT: 3 2 0.50 * str h13, [x27, x5, sxtx #1]
+# CHECK-NEXT: 1 6 0.33 * ldr h14, [x26, w6, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ldr h15, [x25, w7, uxtw]
+# CHECK-NEXT: 2 7 0.33 * ldr h16, [x24, w8, uxtw #1]
+# CHECK-NEXT: 1 6 0.33 * ldr h17, [x23, w9, sxtw]
+# CHECK-NEXT: 2 2 0.50 * str h18, [x22, w10, sxtw]
+# CHECK-NEXT: 2 7 0.33 * ldr h19, [x21, wzr, sxtw #1]
+# CHECK-NEXT: 1 4 0.33 * ldrb w3, [sp, x5]
+# CHECK-NEXT: 1 4 0.33 * ldrb w9, [x27, x6]
+# CHECK-NEXT: 1 4 0.33 * ldrsb w10, [x30, x7]
+# CHECK-NEXT: 1 4 0.33 * ldrb w11, [x29, x3, sxtx]
+# CHECK-NEXT: 2 1 0.50 * strb w12, [x28, xzr, sxtx]
+# CHECK-NEXT: 1 4 0.33 * ldrb w14, [x26, w6, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrsb w15, [x25, w7, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrb w17, [x23, w9, sxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrsb x18, [x22, w10, sxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrsh w3, [sp, x5]
+# CHECK-NEXT: 1 4 0.33 * ldrsh w9, [x27, x6]
+# CHECK-NEXT: 1 4 0.33 * ldrh w10, [x30, x7, lsl #1]
+# CHECK-NEXT: 2 1 0.50 * strh w11, [x29, x3, sxtx]
+# CHECK-NEXT: 1 4 0.33 * ldrh w12, [x28, xzr, sxtx]
+# CHECK-NEXT: 1 4 0.33 * ldrsh x13, [x27, x5, sxtx #1]
+# CHECK-NEXT: 1 4 0.33 * ldrh w14, [x26, w6, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrh w15, [x25, w7, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrsh w16, [x24, w8, uxtw #1]
+# CHECK-NEXT: 1 4 0.33 * ldrh w17, [x23, w9, sxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrh w18, [x22, w10, sxtw]
+# CHECK-NEXT: 2 1 0.50 * strh w19, [x21, wzr, sxtw #1]
+# CHECK-NEXT: 1 4 0.33 * ldr w3, [sp, x5]
+# CHECK-NEXT: 1 6 0.33 * ldr s9, [x27, x6]
+# CHECK-NEXT: 1 4 0.33 * ldr w10, [x30, x7, lsl #2]
+# CHECK-NEXT: 1 4 0.33 * ldr w11, [x29, x3, sxtx]
+# CHECK-NEXT: 2 2 0.50 * str s12, [x28, xzr, sxtx]
+# CHECK-NEXT: 2 1 0.50 * str w13, [x27, x5, sxtx #2]
+# CHECK-NEXT: 2 1 0.50 * str w14, [x26, w6, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldr w15, [x25, w7, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldr w16, [x24, w8, uxtw #2]
+# CHECK-NEXT: 1 4 0.33 * ldrsw x17, [x23, w9, sxtw]
+# CHECK-NEXT: 1 4 0.33 * ldr w18, [x22, w10, sxtw]
+# CHECK-NEXT: 1 4 0.33 * ldrsw x19, [x21, wzr, sxtw #2]
+# CHECK-NEXT: 1 4 0.33 * ldr x3, [sp, x5]
+# CHECK-NEXT: 2 1 0.50 * str x9, [x27, x6]
+# CHECK-NEXT: 1 6 0.33 * ldr d10, [x30, x7, lsl #3]
+# CHECK-NEXT: 2 1 0.50 * str x11, [x29, x3, sxtx]
+# CHECK-NEXT: 1 4 0.33 * ldr x12, [x28, xzr, sxtx]
+# CHECK-NEXT: 1 4 0.33 * ldr x13, [x27, x5, sxtx #3]
+# CHECK-NEXT: 1 4 0.33 U prfm pldl1keep, [x26, w6, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldr x15, [x25, w7, uxtw]
+# CHECK-NEXT: 1 4 0.33 * ldr x16, [x24, w8, uxtw #3]
+# CHECK-NEXT: 1 4 0.33 * ldr x17, [x23, w9, sxtw]
+# CHECK-NEXT: 1 4 0.33 * ldr x18, [x22, w10, sxtw]
+# CHECK-NEXT: 2 2 0.50 * str d19, [x21, wzr, sxtw #3]
+# CHECK-NEXT: 1 6 0.33 * ldr q3, [sp, x5]
+# CHECK-NEXT: 1 6 0.33 * ldr q9, [x27, x6]
+# CHECK-NEXT: 2 7 0.33 * ldr q10, [x30, x7, lsl #4]
+# CHECK-NEXT: 2 2 0.50 * str q11, [x29, x3, sxtx]
+# CHECK-NEXT: 2 2 0.50 * str q12, [x28, xzr, sxtx]
+# CHECK-NEXT: 3 2 0.50 * str q13, [x27, x5, sxtx #4]
+# CHECK-NEXT: 1 6 0.33 * ldr q14, [x26, w6, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ldr q15, [x25, w7, uxtw]
+# CHECK-NEXT: 2 7 0.33 * ldr q16, [x24, w8, uxtw #4]
+# CHECK-NEXT: 1 6 0.33 * ldr q17, [x23, w9, sxtw]
+# CHECK-NEXT: 2 2 0.50 * str q18, [x22, w10, sxtw]
+# CHECK-NEXT: 2 7 0.33 * ldr q19, [x21, wzr, sxtw #4]
+# CHECK-NEXT: 2 4 0.33 * ldp w3, w5, [sp]
+# CHECK-NEXT: 2 1 0.50 * stp wzr, w9, [sp, #252]
+# CHECK-NEXT: 2 4 0.33 * ldp w2, wzr, [sp, #-256]
+# CHECK-NEXT: 2 4 0.33 * ldp w9, w10, [sp, #4]
+# CHECK-NEXT: 5 5 1.00 * ldpsw x9, x10, [sp, #4]
+# CHECK-NEXT: 5 5 1.00 * ldpsw x9, x10, [x2, #-256]
+# CHECK-NEXT: 5 5 1.00 * ldpsw x20, x30, [sp, #252]
+# CHECK-NEXT: 2 4 0.33 * ldp x21, x29, [x2, #504]
+# CHECK-NEXT: 2 4 0.33 * ldp x22, x23, [x3, #-512]
+# CHECK-NEXT: 2 4 0.33 * ldp x24, x25, [x4, #8]
+# CHECK-NEXT: 2 6 0.33 * ldp s29, s28, [sp, #252]
+# CHECK-NEXT: 2 2 0.50 * stp s27, s26, [sp, #-256]
+# CHECK-NEXT: 2 6 0.33 * ldp s1, s2, [x3, #44]
+# CHECK-NEXT: 2 2 0.50 * stp d3, d5, [x9, #504]
+# CHECK-NEXT: 2 2 0.50 * stp d7, d11, [x10, #-512]
+# CHECK-NEXT: 2 6 0.33 * ldp d2, d3, [x30, #-8]
+# CHECK-NEXT: 3 2 1.00 * stp q3, q5, [sp]
+# CHECK-NEXT: 3 2 1.00 * stp q17, q19, [sp, #1008]
+# CHECK-NEXT: 3 6 0.67 * ldp q23, q29, [x1, #-1024]
+# CHECK-NEXT: 3 4 0.33 * ldp w3, w5, [sp], #0
+# CHECK-NEXT: 3 1 0.50 * stp wzr, w9, [sp], #252
+# CHECK-NEXT: 3 4 0.33 * ldp w2, wzr, [sp], #-256
+# CHECK-NEXT: 3 4 0.33 * ldp w9, w10, [sp], #4
+# CHECK-NEXT: 6 5 1.00 * ldpsw x9, x10, [sp], #4
+# CHECK-NEXT: 6 5 1.00 * ldpsw x9, x10, [x2], #-256
+# CHECK-NEXT: 6 5 1.00 * ldpsw x20, x30, [sp], #252
+# CHECK-NEXT: 3 4 0.33 * ldp x21, x29, [x2], #504
+# CHECK-NEXT: 3 4 0.33 * ldp x22, x23, [x3], #-512
+# CHECK-NEXT: 3 4 0.33 * ldp x24, x25, [x4], #8
+# CHECK-NEXT: 4 6 0.33 * ldp s29, s28, [sp], #252
+# CHECK-NEXT: 4 2 0.50 * stp s27, s26, [sp], #-256
+# CHECK-NEXT: 4 6 0.33 * ldp s1, s2, [x3], #44
+# CHECK-NEXT: 4 2 0.50 * stp d3, d5, [x9], #504
+# CHECK-NEXT: 4 2 0.50 * stp d7, d11, [x10], #-512
+# CHECK-NEXT: 4 6 0.33 * ldp d2, d3, [x30], #-8
+# CHECK-NEXT: 4 2 1.00 * stp q3, q5, [sp], #0
+# CHECK-NEXT: 4 2 1.00 * stp q17, q19, [sp], #1008
+# CHECK-NEXT: 6 6 0.67 * ldp q23, q29, [x1], #-1024
+# CHECK-NEXT: 3 4 0.33 * ldp w3, w5, [sp, #0]!
+# CHECK-NEXT: 3 1 0.50 * stp wzr, w9, [sp, #252]!
+# CHECK-NEXT: 3 4 0.33 * ldp w2, wzr, [sp, #-256]!
+# CHECK-NEXT: 3 4 0.33 * ldp w9, w10, [sp, #4]!
+# CHECK-NEXT: 6 5 1.00 * ldpsw x9, x10, [sp, #4]!
+# CHECK-NEXT: 6 5 1.00 * ldpsw x9, x10, [x2, #-256]!
+# CHECK-NEXT: 6 5 1.00 * ldpsw x20, x30, [sp, #252]!
+# CHECK-NEXT: 3 4 0.33 * ldp x21, x29, [x2, #504]!
+# CHECK-NEXT: 3 4 0.33 * ldp x22, x23, [x3, #-512]!
+# CHECK-NEXT: 3 4 0.33 * ldp x24, x25, [x4, #8]!
+# CHECK-NEXT: 4 6 0.33 * ldp s29, s28, [sp, #252]!
+# CHECK-NEXT: 4 2 0.50 * stp s27, s26, [sp, #-256]!
+# CHECK-NEXT: 4 6 0.33 * ldp s1, s2, [x3, #44]!
+# CHECK-NEXT: 4 2 0.50 * stp d3, d5, [x9, #504]!
+# CHECK-NEXT: 4 2 0.50 * stp d7, d11, [x10, #-512]!
+# CHECK-NEXT: 4 6 0.33 * ldp d2, d3, [x30, #-8]!
+# CHECK-NEXT: 5 2 1.00 * stp q3, q5, [sp, #0]!
+# CHECK-NEXT: 5 2 1.00 * stp q17, q19, [sp, #1008]!
+# CHECK-NEXT: 6 6 0.67 * ldp q23, q29, [x1, #-1024]!
+# CHECK-NEXT: 2 4 0.33 * ldnp w3, w5, [sp]
+# CHECK-NEXT: 2 1 0.50 * stnp wzr, w9, [sp, #252]
+# CHECK-NEXT: 2 4 0.33 * ldnp w2, wzr, [sp, #-256]
+# CHECK-NEXT: 2 4 0.33 * ldnp w9, w10, [sp, #4]
+# CHECK-NEXT: 2 4 0.33 * ldnp x21, x29, [x2, #504]
+# CHECK-NEXT: 2 4 0.33 * ldnp x22, x23, [x3, #-512]
+# CHECK-NEXT: 2 4 0.33 * ldnp x24, x25, [x4, #8]
+# CHECK-NEXT: 2 6 0.33 * ldnp s29, s28, [sp, #252]
+# CHECK-NEXT: 2 2 0.50 * stnp s27, s26, [sp, #-256]
+# CHECK-NEXT: 2 6 0.33 * ldnp s1, s2, [x3, #44]
+# CHECK-NEXT: 2 2 0.50 * stnp d3, d5, [x9, #504]
+# CHECK-NEXT: 2 2 0.50 * stnp d7, d11, [x10, #-512]
+# CHECK-NEXT: 2 6 0.33 * ldnp d2, d3, [x30, #-8]
+# CHECK-NEXT: 3 2 1.00 * stnp q3, q5, [sp]
+# CHECK-NEXT: 3 2 1.00 * stnp q17, q19, [sp, #1008]
+# CHECK-NEXT: 3 6 0.67 * ldnp q23, q29, [x1, #-1024]
+# CHECK-NEXT: 1 1 0.13 mov w3, #983055
+# CHECK-NEXT: 1 1 0.13 mov x10, #-6148914691236517206
+# CHECK-NEXT: 1 1 0.25 ands w4, w4, #0xf000f
+# CHECK-NEXT: 1 1 0.25 ands x11, x11, #0xaaaaaaaaaaaaaaaa
+# CHECK-NEXT: 1 1 0.13 and w12, w23, w21
+# CHECK-NEXT: 1 1 0.13 and w16, w15, w1, lsl #1
+# CHECK-NEXT: 1 1 0.13 and w9, w4, w10, lsl #31
+# CHECK-NEXT: 1 1 0.13 and w3, w30, w11
+# CHECK-NEXT: 1 1 0.13 and x3, x5, x7, lsl #63
+# CHECK-NEXT: 1 1 0.13 and x5, x14, x19, asr #4
+# CHECK-NEXT: 1 1 0.13 and w3, w17, w19, ror #31
+# CHECK-NEXT: 1 1 0.13 and w0, w2, wzr, lsr #17
+# CHECK-NEXT: 1 1 0.13 and w3, w30, w11, asr #2
+# CHECK-NEXT: 1 1 0.13 and xzr, x4, x26
+# CHECK-NEXT: 1 1 0.13 and w3, wzr, w20, ror #2
+# CHECK-NEXT: 1 1 0.13 and x7, x20, xzr, asr #63
+# CHECK-NEXT: 1 1 0.13 bic x13, x20, x14, lsl #47
+# CHECK-NEXT: 1 1 0.13 bic w2, w7, w9
+# CHECK-NEXT: 1 1 0.13 orr w2, w7, w0, asr #31
+# CHECK-NEXT: 1 1 0.13 orr x8, x9, x10, lsl #12
+# CHECK-NEXT: 1 1 0.13 orn x3, x5, x7, asr #2
+# CHECK-NEXT: 1 1 0.13 orn w2, w5, w29
+# CHECK-NEXT: 1 2 0.50 ands w7, wzr, w9, lsl #1
+# CHECK-NEXT: 1 2 0.50 ands x3, x5, x20, ror #63
+# CHECK-NEXT: 1 1 0.25 bics w3, w5, w7
+# CHECK-NEXT: 1 2 0.50 bics x3, xzr, x3, lsl #1
+# CHECK-NEXT: 1 2 0.50 tst w3, w7, lsl #31
+# CHECK-NEXT: 1 2 0.50 tst x2, x20, asr #2
+# CHECK-NEXT: 1 0 0.10 mov x3, x6
+# CHECK-NEXT: 1 0 0.10 mov x3, xzr
+# CHECK-NEXT: 1 0 0.10 mov wzr, w2
+# CHECK-NEXT: 1 0 0.10 mov w3, w5
+# CHECK-NEXT: 1 1 0.13 movz w2, #0, lsl #16
+# CHECK-NEXT: 1 1 0.13 mov w2, #-1235
+# CHECK-NEXT: 1 1 0.13 mov x2, #5299989643264
+# CHECK-NEXT: 1 0 0.10 mov x2, #0
+# CHECK-NEXT: 1 1 0.13 movk w3, #0
+# CHECK-NEXT: 1 1 0.13 movz x4, #0, lsl #16
+# CHECK-NEXT: 1 1 0.13 movk w5, #0, lsl #16
+# CHECK-NEXT: 1 1 0.13 movz x6, #0, lsl #32
+# CHECK-NEXT: 1 1 0.13 movk x7, #0, lsl #32
+# CHECK-NEXT: 1 1 0.13 movz x8, #0, lsl #48
+# CHECK-NEXT: 1 1 0.13 movk x9, #0, lsl #48
+# CHECK-NEXT: 1 1 0.13 adr x2, #1600
+# CHECK-NEXT: 1 1 0.13 adrp x21, #6553600
+# CHECK-NEXT: 1 1 0.13 adr x0, #262144
+# CHECK-NEXT: 1 1 0.33 tbz x12, #62, #0
+# CHECK-NEXT: 1 1 0.33 tbz x12, #62, #4
+# CHECK-NEXT: 1 1 0.33 tbz x12, #62, #-32768
+# CHECK-NEXT: 1 1 0.33 tbnz x12, #60, #32764
+# CHECK-NEXT: 1 1 0.33 b #4
+# CHECK-NEXT: 1 1 0.33 b #-4
+# CHECK-NEXT: 1 1 0.33 b #134217724
+# CHECK-NEXT: 1 1 0.33 br x20
+# CHECK-NEXT: 2 1 0.33 blr xzr
+# CHECK-NEXT: 1 1 0.33 U ret x10
+# CHECK-NEXT: 1 1 0.33 U ret
+# CHECK-NEXT: 1 1 0.33 U eret
+# CHECK-NEXT: 1 1 0.33 U drps
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: 7.33 7.33 7.33 33.00 33.00 40.50 40.50 40.50 40.50 99.00 99.00 165.00 280.25 165.25 77.92 77.92 77.92 77.92 77.92 77.92 66.00 197.00 81.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add w2, w3, #4095
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add w30, w29, #1, lsl #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add w13, w5, #4095, lsl #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add x5, x7, #1638
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add w20, wsp, #801
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add wsp, wsp, #1104
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add wsp, w30, #4084
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add x0, x24, #291
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add x3, x24, #4095, lsl #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add x8, sp, #1074
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add sp, x29, #3816
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sub w0, wsp, #4077
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sub w4, w20, #546, lsl #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sub sp, sp, #288
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sub wsp, w19, #16
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adds w13, w23, #291, lsl #12
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn w2, #4095
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adds w20, wsp, #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn x3, #1, lsl #12
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp sp, #20, lsl #12
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp x30, #4095
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - subs x4, sp, #3822
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn w3, #291, lsl #12
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn wsp, #1365
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn sp, #1092, lsl #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - mov sp, x30
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - mov wsp, w20
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - mov x11, sp
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - mov w24, wsp
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add w3, w5, w7
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add wzr, w3, w5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add w20, wzr, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add w4, w6, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add w11, w13, w15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add w9, w3, wzr, lsl #10
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add w17, w29, w20, lsl #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add w21, w22, w23, lsr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add w24, w25, w26, lsr #18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add w27, w28, w29, lsr #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add w2, w3, w4, asr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add w5, w6, w7, asr #21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add w8, w9, w10, asr #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add x3, x5, x7
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add xzr, x3, x5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add x20, xzr, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add x4, x6, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - add x11, x13, x15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add x9, x3, xzr, lsl #10
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add x17, x29, x20, lsl #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add x21, x22, x23, lsr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add x24, x25, x26, lsr #18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add x27, x28, x29, lsr #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add x2, x3, x4, asr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add x5, x6, x7, asr #21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - add x8, x9, x10, asr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adds w3, w5, w7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn w3, w5
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adds w20, wzr, w4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adds w4, w6, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adds w11, w13, w15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds w9, w3, wzr, lsl #10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds w17, w29, w20, lsl #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds w21, w22, w23, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds w24, w25, w26, lsr #18
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds w27, w28, w29, lsr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds w2, w3, w4, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds w5, w6, w7, asr #21
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds w8, w9, w10, asr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adds x3, x5, x7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn x3, x5
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adds x20, xzr, x4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adds x4, x6, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adds x11, x13, x15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds x9, x3, xzr, lsl #10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds x17, x29, x20, lsl #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds x21, x22, x23, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds x24, x25, x26, lsr #18
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds x27, x28, x29, lsr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds x2, x3, x4, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds x5, x6, x7, asr #21
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - adds x8, x9, x10, asr #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sub w3, w5, w7
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sub wzr, w3, w5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sub w4, w6, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sub w11, w13, w15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub w9, w3, wzr, lsl #10
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub w17, w29, w20, lsl #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub w21, w22, w23, lsr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub w24, w25, w26, lsr #18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub w27, w28, w29, lsr #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub w2, w3, w4, asr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub w5, w6, w7, asr #21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub w8, w9, w10, asr #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sub x3, x5, x7
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sub xzr, x3, x5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sub x4, x6, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sub x11, x13, x15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub x9, x3, xzr, lsl #10
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub x17, x29, x20, lsl #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub x21, x22, x23, lsr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub x24, x25, x26, lsr #18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub x27, x28, x29, lsr #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub x2, x3, x4, asr #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub x5, x6, x7, asr #21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sub x8, x9, x10, asr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - subs w3, w5, w7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp w3, w5
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - subs w4, w6, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - subs w11, w13, w15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs w9, w3, wzr, lsl #10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs w17, w29, w20, lsl #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs w21, w22, w23, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs w24, w25, w26, lsr #18
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs w27, w28, w29, lsr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs w2, w3, w4, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs w5, w6, w7, asr #21
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs w8, w9, w10, asr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - subs x3, x5, x7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp x3, x5
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - subs x4, x6, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - subs x11, x13, x15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs x9, x3, xzr, lsl #10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs x17, x29, x20, lsl #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs x21, x22, x23, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs x24, x25, x26, lsr #18
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs x27, x28, x29, lsr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs x2, x3, x4, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs x5, x6, x7, asr #21
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - subs x8, x9, x10, asr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn wzr, w4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn w5, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn w6, w7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn w8, w9, lsl #15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn w10, w11, lsl #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn w12, w13, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn w14, w15, lsr #21
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn w16, w17, lsr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn w18, w19, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn w20, w21, asr #22
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn w22, w23, asr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn x0, x3
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn xzr, x4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn x5, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmn x6, x7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn x8, x9, lsl #15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn x10, x11, lsl #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn x12, x13, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn x14, x15, lsr #41
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn x16, x17, lsr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn x18, x19, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn x20, x21, asr #55
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmn x22, x23, asr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp w0, w3
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp wzr, w4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp w5, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp w6, w7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp w8, w9, lsl #15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp w10, w11, lsl #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp w12, w13, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp w14, w15, lsr #21
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp w18, w19, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp w20, w21, asr #22
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp w22, w23, asr #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp x0, x3
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp xzr, x4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp x5, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp x6, x7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp x8, x9, lsl #15
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp x10, x11, lsl #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp x12, x13, lsr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp x14, x15, lsr #41
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp x16, x17, lsr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp x18, x19, asr #0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp x20, x21, asr #55
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - cmp x22, x23, asr #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp wzr, w0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cmp xzr, x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adc w29, w27, w25
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adc wzr, w3, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adc w9, wzr, w10
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adc w20, w0, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adc x29, x27, x25
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adc xzr, x3, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adc x9, xzr, x10
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adc x20, x0, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adcs w29, w27, w25
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adcs wzr, w3, w4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adcs w9, wzr, w10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adcs w20, w0, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adcs x29, x27, x25
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adcs xzr, x3, x4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adcs x9, xzr, x10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adcs x20, x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbc w29, w27, w25
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbc wzr, w3, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngc w9, w10
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbc w20, w0, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbc x29, x27, x25
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbc xzr, x3, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngc x9, x10
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbc x20, x0, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbcs w29, w27, w25
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbcs wzr, w3, w4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngcs w9, w10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbcs w20, w0, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbcs x29, x27, x25
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbcs xzr, x3, x4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngcs x9, x10
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbcs x20, x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngc w3, w12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngc wzr, w9
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngc w23, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngc x29, x30
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngc xzr, x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngc x0, xzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngcs w3, w12
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngcs wzr, w9
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngcs w23, wzr
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngcs x29, x30
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngcs xzr, x0
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ngcs x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbfx x1, x2, #3, #2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr x3, x4, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr wzr, wzr, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbfx w12, w9, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ubfiz x4, x5, #52, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ubfx xzr, x4, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ubfiz x4, xzr, #1, #6
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr x5, x6, #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfi x4, x5, #52, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil xzr, x4, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfc x4, #1, #6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil x5, x6, #12, #52
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sxtb w1, w2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sxtb xzr, w3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sxth w9, w10
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sxth x0, w1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sxtw x3, w30
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - uxtb w1, w2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - uxth w9, w10
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ubfx x3, x30, #0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr w3, w2, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr w9, w10, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr x20, x21, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr w1, wzr, #3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr w3, w2, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr w9, w10, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr x20, x21, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr wzr, wzr, #3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr w3, w2, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsl w9, w10, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsl x20, x21, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsl w1, wzr, #3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbfx w9, w10, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbfiz x2, x3, #63, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr x19, x20, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbfiz x9, x10, #5, #59
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr w9, w10, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbfiz w11, w12, #31, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbfiz w13, w14, #29, #3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbfiz xzr, xzr, #10, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbfx w9, w10, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr x2, x3, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr x19, x20, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr x9, x10, #5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr w9, w10, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr w11, w12, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr w13, w14, #29
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - sbfx xzr, xzr, #10, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil w9, w10, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfi x2, x3, #63, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil x19, x20, #0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfi x9, x10, #5, #59
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil w9, w10, #0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfi w11, w12, #31, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfi w13, w14, #29, #3
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfc xzr, #10, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil w9, w10, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil x2, x3, #63, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil x19, x20, #0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil x9, x10, #5, #59
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil w9, w10, #0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil w11, w12, #31, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil w13, w14, #29, #3
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bfxil xzr, xzr, #10, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ubfx w9, w10, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsl x2, x3, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr x19, x20, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsl x9, x10, #5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr w9, w10, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsl w11, w12, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsl w13, w14, #29
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ubfiz xzr, xzr, #10, #11
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ubfx w9, w10, #0, #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr x2, x3, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr x19, x20, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr x9, x10, #5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr w9, w10, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr w11, w12, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr w13, w14, #29
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ubfx xzr, xzr, #10, #11
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - cbz w5, #4
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - cbz x5, #0
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - cbnz x2, #-4
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - cbnz x26, #1048572
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - cbz wzr, #0
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - cbnz xzr, #0
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - b.ne #4
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - b.ge #1048572
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - b.ge #-4
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmp w1, #31, #0, eq
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmp w3, #0, #15, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmp wzr, #15, #13, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmp x9, #31, #0, le
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmp x3, #0, #15, gt
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmp xzr, #5, #7, ne
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmn w1, #31, #0, eq
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmn w3, #0, #15, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmn wzr, #15, #13, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmn x9, #31, #0, le
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmn x3, #0, #15, gt
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmn xzr, #5, #7, ne
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmp w1, wzr, #0, eq
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmp w3, w0, #15, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmp wzr, w15, #13, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmp x9, xzr, #0, le
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmp x3, x0, #15, gt
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmp xzr, x5, #7, ne
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmn w1, wzr, #0, eq
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmn w3, w0, #15, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmn wzr, w15, #13, hs
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmn x9, xzr, #0, le
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmn x3, x0, #15, gt
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ccmn xzr, x5, #7, ne
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csel w1, w0, w19, ne
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csel wzr, w5, w9, eq
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csel w9, wzr, w30, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csel w1, w28, wzr, mi
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csel x19, x23, x29, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csel xzr, x3, x4, ge
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csel x5, xzr, x6, hs
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csel x7, x8, xzr, lo
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinc w1, w0, w19, ne
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinc wzr, w5, w9, eq
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinc w9, wzr, w30, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinc w1, w28, wzr, mi
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinc x19, x23, x29, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinc xzr, x3, x4, ge
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinc x5, xzr, x6, hs
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinc x7, x8, xzr, lo
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinv w1, w0, w19, ne
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinv wzr, w5, w9, eq
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinv w9, wzr, w30, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinv w1, w28, wzr, mi
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinv x19, x23, x29, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinv xzr, x3, x4, ge
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinv x5, xzr, x6, hs
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinv x7, x8, xzr, lo
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csneg w1, w0, w19, ne
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csneg wzr, w5, w9, eq
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csneg w9, wzr, w30, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csneg w1, w28, wzr, mi
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csneg x19, x23, x29, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csneg xzr, x3, x4, ge
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csneg x5, xzr, x6, hs
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csneg x7, x8, xzr, lo
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cset w3, eq
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cset x9, pl
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csetm w20, ne
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csetm x30, ge
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinc w2, wzr, wzr, al
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinv x3, xzr, xzr, nv
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cinc w3, w5, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cinc wzr, w4, le
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cset w9, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cinc x3, x5, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cinc xzr, x4, le
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cset x9, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinc w5, w6, w6, nv
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinc x1, x2, x2, al
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cinv w3, w5, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cinv wzr, w4, le
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csetm w9, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cinv x3, x5, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cinv xzr, x4, le
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csetm x9, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinv x1, x0, x0, al
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinv w9, w8, w8, nv
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cneg w3, w5, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cneg wzr, w4, le
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cneg w9, wzr, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cneg x3, x5, gt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cneg xzr, x4, le
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cneg x9, xzr, lt
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csneg x4, x8, x8, al
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - csinv w9, w8, w8, nv
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - rbit w0, w7
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - rbit x18, x3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - rev16 w17, w1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - rev16 x5, x2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - rev w18, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - rev32 x20, x1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - rev x22, x2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - clz w24, w3
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - clz x26, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cls w3, w5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - cls x20, x5
+# CHECK-NEXT: - - - - - - - - - - - - 12.00 - - - - - - - - - - udiv w0, w7, w10
+# CHECK-NEXT: - - - - - - - - - - - - 20.00 - - - - - - - - - - udiv x9, x22, x4
+# CHECK-NEXT: - - - - - - - - - - - - 12.00 - - - - - - - - - - sdiv w12, w21, w0
+# CHECK-NEXT: - - - - - - - - - - - - 20.00 - - - - - - - - - - sdiv x13, x2, x1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsl w11, w12, w13
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsl x14, x15, x16
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr w17, w18, w19
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr x20, x21, x22
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr w23, w24, w25
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr x26, x27, x28
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ror w0, w1, w2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ror x3, x4, x5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsl w6, w7, w8
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsl x9, x10, x11
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr w12, w13, w14
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - lsr x15, x16, x17
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr w18, w19, w20
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - asr x21, x22, x23
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ror w24, w25, w26
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ror x27, x28, x29
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smulh x30, x29, x28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smulh xzr, x27, x26
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umulh x30, x29, x28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umulh x23, x30, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - madd w1, w3, w7, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - madd wzr, w0, w9, w11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - madd w13, wzr, w4, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - madd w19, w30, wzr, w29
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - mul w4, w5, w6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - madd x1, x3, x7, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - madd xzr, x0, x9, x11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - madd x13, xzr, x4, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - madd x19, x30, xzr, x29
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - mul x4, x5, x6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - msub w1, w3, w7, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - msub wzr, w0, w9, w11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - msub w13, wzr, w4, w4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - msub w19, w30, wzr, w29
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - mneg w4, w5, w6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - msub x1, x3, x7, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - msub xzr, x0, x9, x11
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - msub x13, xzr, x4, x4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - msub x19, x30, xzr, x29
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - mneg x4, x5, x6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smaddl x3, w5, w2, x9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smaddl xzr, w10, w11, x12
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smaddl x13, wzr, w14, x15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smaddl x16, w17, wzr, x18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smull x19, w20, w21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smsubl x3, w5, w2, x9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smsubl xzr, w10, w11, x12
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smsubl x13, wzr, w14, x15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smsubl x16, w17, wzr, x18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smnegl x19, w20, w21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umaddl x3, w5, w2, x9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umaddl xzr, w10, w11, x12
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umaddl x13, wzr, w14, x15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umaddl x16, w17, wzr, x18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umull x19, w20, w21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umsubl x3, w5, w2, x9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umsubl x16, w17, wzr, x18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umnegl x19, w20, w21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smulh x30, x29, x28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smulh x23, x22, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umulh x23, x22, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - mul x19, x20, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - mneg w21, w22, w23
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smull x11, w13, w17
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umull x11, w13, w17
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - smnegl x11, w13, w17
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - umnegl x11, w13, w17
+# CHECK-NEXT: - - - - - - - - - - - - 0.63 0.63 0.13 0.13 0.13 0.13 0.13 0.13 - - - extr w3, w5, w7, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.63 0.63 0.13 0.13 0.13 0.13 0.13 0.13 - - - extr w11, w13, w17, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.63 0.63 0.13 0.13 0.13 0.13 0.13 0.13 - - - extr x3, x5, x7, #15
+# CHECK-NEXT: - - - - - - - - - - - - 0.63 0.63 0.13 0.13 0.13 0.13 0.13 0.13 - - - extr x11, x13, x17, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ror x19, x23, #24
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ror x29, xzr, #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ror w9, w13, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmp s3, s5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmp s31, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmp s31, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmpe s29, s30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmpe s15, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmpe s15, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmp d4, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmp d23, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmp d23, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmpe d26, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmpe d29, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmpe d29, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fccmp s1, s31, #0, eq
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fccmp s3, s0, #15, hs
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fccmp s31, s15, #13, hs
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fccmp d9, d31, #0, le
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fccmp d3, d0, #15, gt
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fccmp d31, d5, #7, ne
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fccmpe s1, s31, #0, eq
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fccmpe s3, s0, #15, hs
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fccmpe s31, s15, #13, hs
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fccmpe d9, d31, #0, le
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fccmpe d3, d0, #15, gt
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fccmpe d31, d5, #7, ne
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcsel s3, s20, s9, pl
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcsel d9, d10, d11, mi
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov s0, s1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabs s2, s3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fneg s4, s5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 fsqrt s6, s7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcvt d8, s9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcvt h10, s11
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintn s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintp s14, s15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintm s16, s17
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintz s18, s19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frinta s20, s21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintx s22, s23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frinti s24, s25
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov d0, d1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabs d2, d3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fneg d4, d5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 8.00 fsqrt d6, d7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcvt s8, d9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcvt h10, d11
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintn d12, d13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintp d14, d15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintm d16, d17
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintz d18, d19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frinta d20, d21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintx d22, d23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frinti d24, d25
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcvt s26, h27
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcvt d28, h29
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul s20, s19, s17
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 fdiv s1, s2, s3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd s4, s5, s6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub s7, s8, s9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax s10, s11, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin s13, s14, s15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm s16, s17, s18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm s19, s20, s21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmul s22, s23, s2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul d20, d19, d17
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 8.00 fdiv d1, d2, d3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd d4, d5, d6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub d7, d8, d9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax d10, d11, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin d13, d14, d15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm d16, d17, d18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm d19, d20, d21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmul d22, d23, d24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmadd s3, s5, s6, s31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmadd d3, d13, d0, d23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmsub s3, s5, s6, s31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmsub d3, d13, d0, d23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmadd s3, s5, s6, s31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmadd d3, d13, d0, d23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmsub s3, s5, s6, s31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmsub d3, d13, d0, d23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs w3, h5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs wzr, h20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs w19, h0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs x3, h5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs x12, h30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs x19, h0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs w3, s5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs wzr, s20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs w19, s0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs x3, s5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs x12, s30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs x19, s0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs w3, d5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs wzr, d20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs w19, d0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs x3, d5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs x12, d30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs x19, d0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu w3, h5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu wzr, h20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu w19, h0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu x3, h5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu x12, h30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu x19, h0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu w3, s5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu wzr, s20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu w19, s0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu x3, s5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu x12, s30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu x19, s0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu w3, d5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu wzr, d20, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu w19, d0, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu x3, d5, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu x12, d30, #45
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu x19, d0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf h23, w19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf h31, wzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf h14, w0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf h23, x19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf h31, xzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf h14, x0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf s23, w19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf s31, wzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf s14, w0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf s23, x19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf s31, xzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf s14, x0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf d23, w19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf d31, wzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf d14, w0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf d23, x19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf d31, xzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf d14, x0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf h23, w19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf h31, wzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf h14, w0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf h23, x19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf h31, xzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf h14, x0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf s23, w19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf s31, wzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf s14, w0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf s23, x19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf s31, xzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf s14, x0, #64
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf d23, w19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf d31, wzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf d14, w0, #32
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf d23, x19, #1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf d31, xzr, #20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf d14, x0, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtns w3, h31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtns xzr, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtnu wzr, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtnu x0, h0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtps wzr, h9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtps x12, h20
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtpu w30, h23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtpu x29, h3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtms w2, h3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtms x4, h5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtmu w6, h7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtmu x8, h9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs w10, h11
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs x12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu w14, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu x15, h16
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf h17, w18
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf h19, x20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf h21, w22
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf h23, x24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtas w25, h26
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtas x27, h28
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtau w29, h30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtau xzr, h0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtns w3, s31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtns xzr, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtnu wzr, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtnu x0, s0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtps wzr, s9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtps x12, s20
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtpu w30, s23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtpu x29, s3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtms w2, s3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtms x4, s5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtmu w6, s7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtmu x8, s9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs w10, s11
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs x12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu w14, s15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu x15, s16
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf s17, w18
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf s19, x20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf s21, w22
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf s23, x24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtas w25, s26
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtas x27, s28
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtau w29, s30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtau xzr, s0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtns w3, d31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtns xzr, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtnu wzr, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtnu x0, d0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtps wzr, d9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtps x12, d20
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtpu w30, d23
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtpu x29, d3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtms w2, d3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtms x4, d5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtmu w6, d7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtmu x8, d9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs w10, d11
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs x12, d13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu w14, d15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu x15, d16
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf d17, w18
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - scvtf d19, x20
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf d21, w22
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - ucvtf d23, x24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtas w25, d26
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtas x27, d28
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtau w29, d30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtau xzr, d0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 fmov w3, s9
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - fmov s9, w3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 fmov x20, d31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - fmov d1, x15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 fmov x3, v12.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 fmov v1.d[1], x19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov s2, #0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov s3, #1.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov d30, #16.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov s4, #1.06250000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov d10, #1.93750000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov s12, #-1.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov d16, #8.50000000
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr w3, #0
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr x29, #4
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsw xzr, #-4
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr s0, #8
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr d0, #1048572
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr q0, #-1048576
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - prfm pldl1strm, #0
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - prfm #22, #0
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - stxrb w18, w8, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - stxrh w24, w15, [x16]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - stxr w5, w6, [x17]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - stxr w1, x10, [x21]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldxrb w30, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldxrh w17, [x4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldxr w22, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldxr x11, [x29]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldxr x11, [x29]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldxr x11, [x29]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - stxp w12, w11, w10, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - stxp wzr, x27, x9, [x12]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldxp w0, wzr, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldxp x17, x0, [x18]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldxp x17, x0, [x18]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - stlxrb w12, w22, [x0]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - stlxrh w10, w1, [x1]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - stlxr w9, w2, [x2]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - stlxr w9, x3, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldaxrb w8, [x4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldaxrh w7, [x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldaxr w6, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldaxr x5, [x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldaxr x5, [x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldaxr x5, [x6]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - stlxp w4, w5, w6, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - 0.33 0.33 0.83 - - - - - - - - 0.50 - - stlxp wzr, x6, x7, [x1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldaxp w5, w18, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldaxp x6, x19, [x22]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldaxp x6, x19, [x22]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stlrb w24, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stlrh w25, [x30]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stlr w26, [x29]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stlr x27, [x28]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stlr x27, [x28]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stlr x27, [x28]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldarb w23, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldarh w22, [x30]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldar wzr, [x29]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldar x21, [x28]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldar x21, [x28]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldar x21, [x28]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - sturb w9, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - sturh wzr, [x12, #255]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stur w16, [x0, #-256]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stur x28, [x14, #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldurb w1, [x20, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldurh w20, [x1, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldur w12, [sp, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldur xzr, [x12, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldursb x9, [x7, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldursh x17, [x19, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldursw x20, [x15, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfum pldl2keep, [sp, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldursb w19, [x1, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldursh w15, [x21, #-256]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stur b0, [sp, #1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stur h12, [x12, #-1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stur s15, [x0, #255]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stur d31, [x5, #25]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stur q9, [x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldur b3, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldur h5, [x4, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldur s7, [x12, #-1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldur d11, [x19, #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldur q13, [x1, #2]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - strb w9, [x2], #255
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - strb w10, [x3], #1
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - strb w10, [x3], #-256
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - strh w9, [x2], #255
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - strh w9, [x2], #1
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - strh w10, [x3], #-256
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - str w19, [sp], #255
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - str w20, [x30], #1
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - str w21, [x12], #-256
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - str xzr, [x9], #255
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - str x2, [x3], #1
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - str x19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrb w9, [x2], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrb w10, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrb w10, [x3], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrh w9, [x2], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrh w9, [x2], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrh w10, [x3], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr w19, [sp], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr w20, [x30], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr w21, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr xzr, [x9], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr x2, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr x19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsb xzr, [x9], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsb x2, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsb x19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsh xzr, [x9], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsh x2, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsh x19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsw xzr, [x9], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsw x2, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsw x19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsb wzr, [x9], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsb w2, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsb w19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsh wzr, [x9], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsh w2, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsh w19, [x12], #-256
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str b0, [x0], #255
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str b3, [x3], #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str b5, [sp], #-256
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str h10, [x10], #255
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str h13, [x23], #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str h15, [sp], #-256
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str s20, [x20], #255
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str s23, [x23], #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str s25, [x0], #-256
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str d20, [x20], #255
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str d23, [x23], #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str d25, [x0], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr b0, [x0], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr b3, [x3], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr b5, [sp], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr h10, [x10], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr h13, [x23], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr h15, [sp], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr s20, [x20], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr s23, [x23], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr s25, [x0], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr d20, [x20], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr d23, [x23], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr d25, [x0], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr q20, [x1], #255
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr q23, [x9], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr q25, [x20], #-256
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str q10, [x1], #255
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str q22, [sp], #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str q21, [x20], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr x3, [x4, #0]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - strb w9, [x2, #255]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - strb w10, [x3, #1]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - strb w10, [x3, #-256]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - strh w9, [x2, #255]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - strh w9, [x2, #1]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - strh w10, [x3, #-256]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - str w19, [sp, #255]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - str w20, [x30, #1]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - str w21, [x12, #-256]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - str xzr, [x9, #255]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - str x2, [x3, #1]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - str x19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrb w9, [x2, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrb w10, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrb w10, [x3, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrh w9, [x2, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrh w9, [x2, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrh w10, [x3, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr w19, [sp, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr w20, [x30, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr w21, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr xzr, [x9, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr x2, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr x19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsb xzr, [x9, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsb x2, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsb x19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsh xzr, [x9, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsh x2, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsh x19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsw xzr, [x9, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsw x2, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsw x19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsb wzr, [x9, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsb w2, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsb w19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsh wzr, [x9, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsh w2, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldrsh w19, [x12, #-256]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str b0, [x0, #255]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str b3, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str b5, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str h10, [x10, #255]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str h13, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str h15, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str s20, [x20, #255]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str s23, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str s25, [x0, #-256]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str d20, [x20, #255]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str d23, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str d25, [x0, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr b0, [x0, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr b3, [x3, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr b5, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr h10, [x10, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr h13, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr h15, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr s20, [x20, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr s23, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr s25, [x0, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr d20, [x20, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr d23, [x23, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr d25, [x0, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr q20, [x1, #255]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr q23, [x9, #1]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldr q25, [x20, #-256]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str q10, [x1, #255]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str q22, [sp, #1]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 str q21, [x20, #-256]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - sttrb w9, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - sttrh wzr, [x12, #255]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - sttr w16, [x0, #-256]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - sttr x28, [x14, #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldtrb w1, [x20, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldtrh w20, [x1, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldtr w12, [sp, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldtr xzr, [x12, #255]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldtrsb x9, [x7, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldtrsh x17, [x19, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldtrsw x20, [x15, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldtrsb w19, [x1, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldtrsh w15, [x21, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr x4, [x29]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr x30, [x12, #32760]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr x20, [sp, #8]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr xzr, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr w2, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr w17, [sp, #16380]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr w13, [x2, #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsw x2, [x5, #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsw x23, [sp, #16380]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrh w2, [x4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsh w23, [x6, #8190]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsh wzr, [sp, #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsh x29, [x2, #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrb w26, [x3, #121]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrb w12, [x2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsb w27, [sp, #4095]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsb xzr, [x15]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - str x30, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - str w20, [x4, #16380]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - strh w17, [sp, #8190]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - strb w23, [x3, #4095]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - strb wzr, [x2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr b31, [sp, #4095]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr h20, [x2, #8190]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr s10, [x19, #16380]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr d3, [x10, #32760]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 str q12, [sp, #65520]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr h3, [sp, x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr h9, [x27, x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr h10, [x30, x7, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 str h11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 str h12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 str h13, [x27, x5, sxtx #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr h14, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr h15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr h16, [x24, w8, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr h17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 str h18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr h19, [x21, wzr, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrb w3, [sp, x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrb w9, [x27, x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsb w10, [x30, x7]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrb w11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - strb w12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrb w14, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsb w15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrb w17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsb x18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsh w3, [sp, x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsh w9, [x27, x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrh w10, [x30, x7, lsl #1]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - strh w11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrh w12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsh x13, [x27, x5, sxtx #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrh w14, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrh w15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsh w16, [x24, w8, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrh w17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrh w18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - strh w19, [x21, wzr, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr w3, [sp, x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr s9, [x27, x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr w10, [x30, x7, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr w11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 str s12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - str w13, [x27, x5, sxtx #2]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - str w14, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr w15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr w16, [x24, w8, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsw x17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr w18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldrsw x19, [x21, wzr, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr x3, [sp, x5]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - str x9, [x27, x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr d10, [x30, x7, lsl #3]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - str x11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr x12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr x13, [x27, x5, sxtx #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfm pldl1keep, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr x15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr x16, [x24, w8, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr x17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr x18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 str d19, [x21, wzr, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr q3, [sp, x5]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr q9, [x27, x6]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr q10, [x30, x7, lsl #4]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 str q11, [x29, x3, sxtx]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 str q12, [x28, xzr, sxtx]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 str q13, [x27, x5, sxtx #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr q14, [x26, w6, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr q15, [x25, w7, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr q16, [x24, w8, uxtw #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr q17, [x23, w9, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 str q18, [x22, w10, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldr q19, [x21, wzr, sxtw #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldp w3, w5, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stp wzr, w9, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldp w2, wzr, [sp, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldp w9, w10, [sp, #4]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldpsw x9, x10, [sp, #4]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldpsw x9, x10, [x2, #-256]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldpsw x20, x30, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldp x21, x29, [x2, #504]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldp x22, x23, [x3, #-512]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldp x24, x25, [x4, #8]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldp s29, s28, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stp s27, s26, [sp, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldp s1, s2, [x3, #44]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stp d3, d5, [x9, #504]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stp d7, d11, [x10, #-512]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldp d2, d3, [x30, #-8]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 stp q3, q5, [sp]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 stp q17, q19, [sp, #1008]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - - - ldp q23, q29, [x1, #-1024]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldp w3, w5, [sp], #0
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - stp wzr, w9, [sp], #252
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldp w2, wzr, [sp], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldp w9, w10, [sp], #4
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldpsw x9, x10, [sp], #4
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldpsw x9, x10, [x2], #-256
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldpsw x20, x30, [sp], #252
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldp x21, x29, [x2], #504
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldp x22, x23, [x3], #-512
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldp x24, x25, [x4], #8
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldp s29, s28, [sp], #252
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 stp s27, s26, [sp], #-256
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldp s1, s2, [x3], #44
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 stp d3, d5, [x9], #504
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 stp d7, d11, [x10], #-512
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldp d2, d3, [x30], #-8
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 1.00 1.00 stp q3, q5, [sp], #0
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 1.00 1.00 stp q17, q19, [sp], #1008
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - - - ldp q23, q29, [x1], #-1024
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldp w3, w5, [sp, #0]!
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 - - stp wzr, w9, [sp, #252]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldp w2, wzr, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldp w9, w10, [sp, #4]!
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldpsw x9, x10, [sp, #4]!
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldpsw x9, x10, [x2, #-256]!
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldpsw x20, x30, [sp, #252]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldp x21, x29, [x2, #504]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldp x22, x23, [x3, #-512]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldp x24, x25, [x4, #8]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldp s29, s28, [sp, #252]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 stp s27, s26, [sp, #-256]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldp s1, s2, [x3, #44]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 stp d3, d5, [x9, #504]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 0.50 0.50 stp d7, d11, [x10, #-512]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - - - ldp d2, d3, [x30, #-8]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 1.00 1.00 stp q3, q5, [sp, #0]!
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.50 1.00 1.00 stp q17, q19, [sp, #1008]!
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - - - ldp q23, q29, [x1, #-1024]!
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnp w3, w5, [sp]
+# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 - - - - - - - - 0.50 - - stnp wzr, w9, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnp w2, wzr, [sp, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnp w9, w10, [sp, #4]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnp x21, x29, [x2, #504]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnp x22, x23, [x3, #-512]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnp x24, x25, [x4, #8]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnp s29, s28, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnp s27, s26, [sp, #-256]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnp s1, s2, [x3, #44]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnp d3, d5, [x9, #504]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnp d7, d11, [x10, #-512]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnp d2, d3, [x30, #-8]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 stnp q3, q5, [sp]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 stnp q17, q19, [sp, #1008]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - - - ldnp q23, q29, [x1, #-1024]
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - mov w3, #983055
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - mov x10, #-6148914691236517206
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ands w4, w4, #0xf000f
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ands x11, x11, #0xaaaaaaaaaaaaaaaa
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - and w12, w23, w21
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - and w16, w15, w1, lsl #1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - and w9, w4, w10, lsl #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - and w3, w30, w11
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - and x3, x5, x7, lsl #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - and x5, x14, x19, asr #4
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - and w3, w17, w19, ror #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - and w0, w2, wzr, lsr #17
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - and w3, w30, w11, asr #2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - and xzr, x4, x26
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - and w3, wzr, w20, ror #2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - and x7, x20, xzr, asr #63
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - bic x13, x20, x14, lsl #47
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - bic w2, w7, w9
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - orr w2, w7, w0, asr #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - orr x8, x9, x10, lsl #12
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - orn x3, x5, x7, asr #2
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - orn w2, w5, w29
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - ands w7, wzr, w9, lsl #1
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - ands x3, x5, x20, ror #63
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - bics w3, w5, w7
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - bics x3, xzr, x3, lsl #1
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - tst w3, w7, lsl #31
+# CHECK-NEXT: - - - - - 0.25 0.25 0.25 0.25 - - - 0.50 0.50 - - - - - - - - - tst x2, x20, asr #2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mov x3, x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mov x3, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mov wzr, w2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mov w3, w5
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - movz w2, #0, lsl #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - mov w2, #-1235
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - mov x2, #5299989643264
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mov x2, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - movk w3, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - movz x4, #0, lsl #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - movk w5, #0, lsl #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - movz x6, #0, lsl #32
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - movk x7, #0, lsl #32
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - movz x8, #0, lsl #48
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - movk x9, #0, lsl #48
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adr x2, #1600
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adrp x21, #6553600
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - adr x0, #262144
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - tbz x12, #62, #0
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - tbz x12, #62, #4
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - tbz x12, #62, #-32768
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - tbnz x12, #60, #32764
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - b #4
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - b #-4
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - b #134217724
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - br x20
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - blr xzr
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - ret x10
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - ret
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - eret
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - - - - - - - - - drps
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-clear-upper-regs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-clear-upper-regs.s
new file mode 100644
index 0000000..16c37aa
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-clear-upper-regs.s
@@ -0,0 +1,872 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3ae --timeline --timeline-max-iterations=4 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN FPR8-bit
+ldr b0, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN FPR16-bit
+ldr h0, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN FPR32-bit
+ldr s0, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN FPR64-bit
+ldr d0, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN FPR128-bit
+ldr q0, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN SIMD64-bit-b
+ld1 {v0.8b}, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN SIMD64-bit-h
+ld1 {v0.4h}, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN SIMD64-bit-s
+ld1 {v0.2s}, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN SIMD64-bit-d
+ld1 {v0.1d}, [sp]
+add z0.d, z0.d, z0.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN insr
+insr z0.s, w0
+add z0.s, z0.s, z0.s
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - FPR8-bit
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 60
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.33
+# CHECK-NEXT: IPC: 3.33
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ldr b0, [sp]
+# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - ldr b0, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ldr b0, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ldr b0, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr b0, [sp]
+# CHECK-NEXT: [2,1] D=======eeER add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE--R ldr b0, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.5 ldr b0, [sp]
+# CHECK-NEXT: 1. 4 7.5 0.3 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.4 0.8 0.8 <total>
+
+# CHECK: [1] Code Region - FPR16-bit
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 60
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.33
+# CHECK-NEXT: IPC: 3.33
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ldr h0, [sp]
+# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - ldr h0, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ldr h0, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ldr h0, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr h0, [sp]
+# CHECK-NEXT: [2,1] D=======eeER add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE--R ldr h0, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.5 ldr h0, [sp]
+# CHECK-NEXT: 1. 4 7.5 0.3 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.4 0.8 0.8 <total>
+
+# CHECK: [2] Code Region - FPR32-bit
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 60
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.33
+# CHECK-NEXT: IPC: 3.33
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ldr s0, [sp]
+# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - ldr s0, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ldr s0, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ldr s0, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr s0, [sp]
+# CHECK-NEXT: [2,1] D=======eeER add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE--R ldr s0, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.5 ldr s0, [sp]
+# CHECK-NEXT: 1. 4 7.5 0.3 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.4 0.8 0.8 <total>
+
+# CHECK: [3] Code Region - FPR64-bit
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 60
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.33
+# CHECK-NEXT: IPC: 3.33
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ldr d0, [sp]
+# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - ldr d0, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ldr d0, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ldr d0, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr d0, [sp]
+# CHECK-NEXT: [2,1] D=======eeER add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE--R ldr d0, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.5 ldr d0, [sp]
+# CHECK-NEXT: 1. 4 7.5 0.3 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.4 0.8 0.8 <total>
+
+# CHECK: [4] Code Region - FPR128-bit
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 60
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.33
+# CHECK-NEXT: IPC: 3.33
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ldr q0, [sp]
+# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - ldr q0, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ldr q0, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ldr q0, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ldr q0, [sp]
+# CHECK-NEXT: [2,1] D=======eeER add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE--R ldr q0, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.5 ldr q0, [sp]
+# CHECK-NEXT: 1. 4 7.5 0.3 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.4 0.8 0.8 <total>
+
+# CHECK: [5] Code Region - SIMD64-bit-b
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 60
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.33
+# CHECK-NEXT: IPC: 3.33
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ld1 { v0.8b }, [sp]
+# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - ld1 { v0.8b }, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ld1 { v0.8b }, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ld1 { v0.8b }, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ld1 { v0.8b }, [sp]
+# CHECK-NEXT: [2,1] D=======eeER add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE--R ld1 { v0.8b }, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.5 ld1 { v0.8b }, [sp]
+# CHECK-NEXT: 1. 4 7.5 0.3 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.4 0.8 0.8 <total>
+
+# CHECK: [6] Code Region - SIMD64-bit-h
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 60
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.33
+# CHECK-NEXT: IPC: 3.33
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ld1 { v0.4h }, [sp]
+# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - ld1 { v0.4h }, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ld1 { v0.4h }, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ld1 { v0.4h }, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ld1 { v0.4h }, [sp]
+# CHECK-NEXT: [2,1] D=======eeER add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE--R ld1 { v0.4h }, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.5 ld1 { v0.4h }, [sp]
+# CHECK-NEXT: 1. 4 7.5 0.3 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.4 0.8 0.8 <total>
+
+# CHECK: [7] Code Region - SIMD64-bit-s
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 60
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.33
+# CHECK-NEXT: IPC: 3.33
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ld1 { v0.2s }, [sp]
+# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - ld1 { v0.2s }, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ld1 { v0.2s }, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ld1 { v0.2s }, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ld1 { v0.2s }, [sp]
+# CHECK-NEXT: [2,1] D=======eeER add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE--R ld1 { v0.2s }, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.5 ld1 { v0.2s }, [sp]
+# CHECK-NEXT: 1. 4 7.5 0.3 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.4 0.8 0.8 <total>
+
+# CHECK: [8] Code Region - SIMD64-bit-d
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 60
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.33
+# CHECK-NEXT: IPC: 3.33
+# CHECK-NEXT: Block RThroughput: 0.5
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 6 0.33 * ld1 { v0.1d }, [sp]
+# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - 0.50 0.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.34 - - - - - - - - - - - ld1 { v0.1d }, [sp]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, z0.d, z0.d
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ld1 { v0.1d }, [sp]
+# CHECK-NEXT: [0,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,0] DeeeeeeE--R. ld1 { v0.1d }, [sp]
+# CHECK-NEXT: [1,1] D======eeER. add z0.d, z0.d, z0.d
+# CHECK-NEXT: [2,0] DeeeeeeE--R. ld1 { v0.1d }, [sp]
+# CHECK-NEXT: [2,1] D=======eeER add z0.d, z0.d, z0.d
+# CHECK-NEXT: [3,0] D=eeeeeeE--R ld1 { v0.1d }, [sp]
+# CHECK-NEXT: [3,1] D=======eeER add z0.d, z0.d, z0.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.3 1.3 1.5 ld1 { v0.1d }, [sp]
+# CHECK-NEXT: 1. 4 7.5 0.3 0.0 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 4 4.4 0.8 0.8 <total>
+
+# CHECK: [9] Code Region - insr
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.43
+# CHECK-NEXT: IPC: 0.28
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 2 5 1.00 insr z0.s, w0
+# CHECK-NEXT: 1 2 0.50 add z0.s, z0.s, z0.s
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 insr z0.s, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - add z0.s, z0.s, z0.s
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . insr z0.s, w0
+# CHECK-NEXT: [0,1] D=====eeER. . . . . add z0.s, z0.s, z0.s
+# CHECK-NEXT: [1,0] D=======eeeeeER. . . . insr z0.s, w0
+# CHECK-NEXT: [1,1] D============eeER . . . add z0.s, z0.s, z0.s
+# CHECK-NEXT: [2,0] D==============eeeeeER . . insr z0.s, w0
+# CHECK-NEXT: [2,1] D===================eeER . . add z0.s, z0.s, z0.s
+# CHECK-NEXT: [3,0] .D====================eeeeeER . insr z0.s, w0
+# CHECK-NEXT: [3,1] .D=========================eeER add z0.s, z0.s, z0.s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 11.3 0.3 0.0 insr z0.s, w0
+# CHECK-NEXT: 1. 4 16.3 0.0 0.0 add z0.s, z0.s, z0.s
+# CHECK-NEXT: 4 13.8 0.1 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-forwarding.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-forwarding.s
new file mode 100644
index 0000000..1e97750
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-forwarding.s
@@ -0,0 +1,1960 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3ae -mattr=+sve2-aes,+sve2-sha3,+sve2-sm4 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=2 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN madd
+mul x0, x0, x0
+madd x0, x1, x2, x0
+madd x0, x1, x2, x0
+madd x0, x0, x0, x0
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN smaddl
+mul x0, x0, x0
+smaddl x0, w1, w2, x0
+smaddl x0, w1, w2, x0
+smaddl x0, w0, w0, x0
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN fmadd
+fadd d0, d0, d0
+fmadd d0, d1, d2, d0
+fmul d0, d0, d0
+fmadd d0, d1, d2, d0
+fmadd d0, d1, d2, d0
+fmadd d0, d0, d1, d2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN saba
+mul v0.4s, v0.4s, v0.4s
+saba v0.4s, v1.4s, v2.4s
+saba v0.4s, v1.4s, v2.4s
+saba v0.4s, v0.4s, v1.4s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN sdot
+mul v0.4s, v0.4s, v0.4s
+sdot v0.4s, v1.16b, v2.16b
+sdot v0.4s, v1.16b, v2.16b
+sdot v0.4s, v0.16b, v1.16b
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN smmla
+mul v0.4s, v0.4s, v0.4s
+smmla v0.4s, v1.16b, v2.16b
+smmla v0.4s, v1.16b, v2.16b
+smmla v0.4s, v0.16b, v1.16b
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN mla
+mul v0.4s, v0.4s, v0.4s
+mla v0.4s, v1.4s, v2.4s
+mla v0.4s, v1.4s, v2.4s
+mla v0.4s, v0.4s, v1.4s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN sqrdmlah
+mul v0.4s, v0.4s, v0.4s
+sqrdmlah v0.4s, v1.4s, v2.4s
+sqrdmlah v0.4s, v1.4s, v2.4s
+sqrdmlah v0.4s, v0.4s, v1.4s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN smlal2
+mul v0.4s, v0.4s, v0.4s
+smlal2 v0.4s, v1.8h, v2.8h
+smlal2 v0.4s, v1.8h, v2.8h
+smlal2 v0.4s, v0.8h, v1.8h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN sadalp
+mul v0.4s, v0.4s, v0.4s
+sadalp v0.2d, v1.4s
+sadalp v0.2d, v1.4s
+sadalp v0.2d, v0.4s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN ssra
+mul v0.4s, v0.4s, v0.4s
+ssra v0.2d, v1.2d, #1
+ssra v0.2d, v1.2d, #1
+ssra v0.2d, v0.2d, #1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN fcmla
+fmul v0.4s, v0.4s, v0.4s
+fcmla v0.2d, v1.2d, v2.2d, #90
+fcmla v0.2d, v1.2d, v2.2d, #90
+fcmla v0.2d, v0.2d, v1.2d, #90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN fmla
+fmul v0.2d, v0.2d, v0.2d
+fmla v0.2d, v1.2d, v2.2d
+fadd v0.2d, v0.2d, v0.2d
+fmla v0.2d, v1.2d, v2.2d
+fmla v0.2d, v1.2d, v2.2d
+fmla v0.2d, v0.2d, v1.2d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN fmlal
+fmul v0.2d, v0.2d, v0.2d
+fmlal v0.4s, v1.4h, v2.4h
+fadd v0.2d, v0.2d, v0.2d
+fmlal v0.4s, v1.4h, v2.4h
+fmlal v0.4s, v1.4h, v2.4h
+fmlal v0.4s, v0.4h, v1.4h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN bfdot
+fmul v0.2d, v0.2d, v0.2d
+bfdot v0.4s, v1.8h, v2.8h
+bfdot v0.4s, v1.8h, v2.8h
+bfdot v0.4s, v0.8h, v1.8h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN bfmmla
+fmul v0.2d, v0.2d, v0.2d
+bfmmla v0.4s, v1.8h, v2.8h
+bfmmla v0.4s, v1.8h, v2.8h
+bfmmla v0.4s, v0.8h, v1.8h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN bfmlalb
+fmul v0.2d, v0.2d, v0.2d
+bfmlalb v0.4s, v1.8h, v2.8h
+bfmlalb v0.4s, v1.8h, v2.8h
+bfmlalb v0.4s, v0.8h, v1.8h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN crc32b
+mul w0, w0, w0
+crc32b w0, w0, w1
+crc32b w0, w0, w1
+crc32b w0, w0, w0
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z saba
+mul z0.d, z0.d, z0.d
+saba z0.d, z1.d, z2.d
+saba z0.d, z1.d, z2.d
+saba z0.d, z0.d, z1.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sadalp
+mul z0.d, z0.d, z0.d
+sadalp z0.d, p0/m, z1.s
+sadalp z0.d, p0/m, z1.s
+sadalp z0.d, p0/m, z0.s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z ssra
+mul z0.d, z0.d, z0.d
+ssra z0.d, z1.d, #1
+ssra z0.d, z1.d, #1
+ssra z0.d, z0.d, #1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z cdot.s
+mul z0.d, z0.d, z0.d
+cdot z0.s, z1.b, z2.b, #90
+cdot z0.s, z1.b, z2.b, #90
+cdot z0.s, z0.b, z1.b, #90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z cdot.d
+mul z0.d, z0.d, z0.d
+cdot z0.d, z1.h, z2.h, #90
+cdot z0.d, z1.h, z2.h, #90
+cdot z0.d, z0.h, z1.h, #90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z cmla.b
+mul z0.d, z0.d, z0.d
+cmla z0.b, z1.b, z2.b, #90
+cmla z0.b, z1.b, z2.b, #90
+cmla z0.b, z0.b, z1.b, #90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z cmla.d
+mul z0.d, z0.d, z0.d
+cmla z0.d, z1.d, z2.d, #90
+cmla z0.d, z1.d, z2.d, #90
+cmla z0.d, z0.d, z1.d, #90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sdot.s
+mul z0.d, z0.d, z0.d
+sdot z0.s, z1.b, z2.b
+sdot z0.s, z1.b, z2.b
+sdot z0.s, z0.b, z1.b
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sudot
+mul z0.d, z0.d, z0.d
+sdot z0.s, z1.b, z2.b[1]
+sdot z0.s, z1.b, z2.b[1]
+sdot z0.s, z0.b, z1.b[1]
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sdot.d
+mul z0.d, z0.d, z0.d
+sdot z0.d, z1.h, z2.h
+sdot z0.d, z1.h, z2.h
+sdot z0.d, z0.h, z1.h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z smmla
+mul z0.s, z0.s, z0.s
+smmla z0.s, z1.b, z2.b
+smmla z0.s, z1.b, z2.b
+smmla z0.s, z0.b, z1.b
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z mla.b
+mul z0.d, z0.d, z0.d
+mla z0.b, p0/m, z1.b, z2.b
+mla z0.b, p0/m, z1.b, z2.b
+mla z0.b, p0/m, z0.b, z1.b
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z mla.d
+mul z0.d, z0.d, z0.d
+mla z0.d, p0/m, z1.d, z2.d
+mla z0.d, p0/m, z1.d, z2.d
+mla z0.d, p0/m, z0.d, z1.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z smlalb
+mul z0.d, z0.d, z0.d
+smlalb z0.d, z1.s, z2.s
+smlalb z0.d, z1.s, z2.s
+smlalb z0.d, z0.s, z1.s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sqdmlalb
+mul z0.d, z0.d, z0.d
+sqdmlalb z0.d, z1.s, z2.s
+sqdmlalb z0.d, z1.s, z2.s
+sqdmlalb z0.d, z0.s, z1.s
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sqrdmlah.b
+mul z0.d, z0.d, z0.d
+sqrdmlah z0.b, z1.b, z2.b
+sqrdmlah z0.b, z1.b, z2.b
+sqrdmlah z0.b, z0.b, z1.b
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z sqrdmlah.d
+mul z0.d, z0.d, z0.d
+sqrdmlah z0.d, z1.d, z2.d
+sqrdmlah z0.d, z1.d, z2.d
+sqrdmlah z0.d, z0.d, z1.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z fcmla ZPmZZ
+fmul z0.d, z0.d, z0.d
+fcmla z0.d, p0/m, z1.d, z2.d, 90
+fcmla z0.d, p0/m, z1.d, z2.d, 90
+fcmla z0.d, p0/m, z0.d, z1.d, 90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z fcmla ZZZI
+fmul z0.d, z0.d, z0.d
+fcmla z0.s, z1.s, z2.s[1], 90
+fcmla z0.s, z1.s, z2.s[1], 90
+fcmla z0.s, z0.s, z1.s[1], 90
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z fmla ZPmZZ
+fmul z0.d, z0.d, z0.d
+fmla z0.d, p0/m, z1.d, z2.d
+fmla z0.d, p0/m, z1.d, z2.d
+fmla z0.d, p0/m, z0.d, z1.d
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z fmla ZZZI
+fmul z0.d, z0.d, z0.d
+fmla z0.d, z1.d, z2.d[1]
+fmla z0.d, z1.d, z2.d[1]
+fmla z0.d, z0.d, z1.d[1]
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z fmlalb ZZZ
+fmul z0.d, z0.d, z0.d
+fmlalb z0.s, z1.h, z2.h
+fmlalb z0.s, z1.h, z2.h
+fmlalb z0.s, z0.h, z1.h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z bfdot
+fmul z0.d, z0.d, z0.d
+bfdot z0.s, z1.h, z2.h
+bfdot z0.s, z1.h, z2.h
+bfdot z0.s, z0.h, z1.h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN Z bfmmla
+fmul z0.d, z0.d, z0.d
+bfmmla z0.s, z1.h, z2.h
+bfmmla z0.s, z1.h, z2.h
+bfmmla z0.s, z0.h, z1.h
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN bfmlalb
+fmul z0.d, z0.d, z0.d
+bfmlalb z0.s, z1.h, z2.h
+bfmlalb z0.s, z1.h, z2.h
+bfmlalb z0.s, z0.h, z1.h
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - madd
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 205
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.95
+# CHECK-NEXT: IPC: 1.95
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . mul x0, x0, x0
+# CHECK-NEXT: [0,1] D=eeER . madd x0, x1, x2, x0
+# CHECK-NEXT: [0,2] DeeE-R . madd x0, x1, x2, x0
+# CHECK-NEXT: [0,3] D==eeER . madd x0, x0, x0, x0
+# CHECK-NEXT: [1,0] D====eeER mul x0, x0, x0
+# CHECK-NEXT: [1,1] D==eeE--R madd x0, x1, x2, x0
+# CHECK-NEXT: [1,2] D=eeE---R madd x0, x1, x2, x0
+# CHECK-NEXT: [1,3] D===eeE-R madd x0, x0, x0, x0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mul x0, x0, x0
+# CHECK-NEXT: 1. 2 2.5 2.5 1.0 madd x0, x1, x2, x0
+# CHECK-NEXT: 2. 2 1.5 1.5 2.0 madd x0, x1, x2, x0
+# CHECK-NEXT: 3. 2 3.5 0.0 0.5 madd x0, x0, x0, x0
+# CHECK-NEXT: 2 2.6 1.1 0.9 <total>
+
+# CHECK: [1] Code Region - smaddl
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 803
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . . mul x0, x0, x0
+# CHECK-NEXT: [0,1] D==eeER . . . smaddl x0, w1, w2, x0
+# CHECK-NEXT: [0,2] D====eeER . . . smaddl x0, w1, w2, x0
+# CHECK-NEXT: [0,3] D======eeER . . smaddl x0, w0, w0, x0
+# CHECK-NEXT: [1,0] D========eeER . . mul x0, x0, x0
+# CHECK-NEXT: [1,1] D==========eeER. . smaddl x0, w1, w2, x0
+# CHECK-NEXT: [1,2] D============eeER . smaddl x0, w1, w2, x0
+# CHECK-NEXT: [1,3] D==============eeER smaddl x0, w0, w0, x0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 5.0 0.5 0.0 mul x0, x0, x0
+# CHECK-NEXT: 1. 2 7.0 0.0 0.0 smaddl x0, w1, w2, x0
+# CHECK-NEXT: 2. 2 9.0 0.0 0.0 smaddl x0, w1, w2, x0
+# CHECK-NEXT: 3. 2 11.0 0.0 0.0 smaddl x0, w0, w0, x0
+# CHECK-NEXT: 2 8.0 0.1 0.0 <total>
+
+# CHECK: [2] Code Region - fmadd
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 1703
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.35
+# CHECK-NEXT: IPC: 0.35
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeER. . . . . . .. fadd d0, d0, d0
+# CHECK-NEXT: [0,1] D==eeeeER . . . . . .. fmadd d0, d1, d2, d0
+# CHECK-NEXT: [0,2] D======eeeER . . . . .. fmul d0, d0, d0
+# CHECK-NEXT: [0,3] D=======eeeeER . . . . .. fmadd d0, d1, d2, d0
+# CHECK-NEXT: [0,4] D=========eeeeER . . . .. fmadd d0, d1, d2, d0
+# CHECK-NEXT: [0,5] D=============eeeeER. . . .. fmadd d0, d0, d1, d2
+# CHECK-NEXT: [1,0] D=================eeER . . .. fadd d0, d0, d0
+# CHECK-NEXT: [1,1] D===================eeeeER . .. fmadd d0, d1, d2, d0
+# CHECK-NEXT: [1,2] D=======================eeeER . .. fmul d0, d0, d0
+# CHECK-NEXT: [1,3] D========================eeeeER .. fmadd d0, d1, d2, d0
+# CHECK-NEXT: [1,4] .D=========================eeeeER .. fmadd d0, d1, d2, d0
+# CHECK-NEXT: [1,5] .D=============================eeeeER fmadd d0, d0, d1, d2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 9.5 0.5 0.0 fadd d0, d0, d0
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 fmadd d0, d1, d2, d0
+# CHECK-NEXT: 2. 2 15.5 0.0 0.0 fmul d0, d0, d0
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmadd d0, d1, d2, d0
+# CHECK-NEXT: 4. 2 18.0 0.0 0.0 fmadd d0, d1, d2, d0
+# CHECK-NEXT: 5. 2 22.0 0.0 0.0 fmadd d0, d0, d1, d2
+# CHECK-NEXT: 2 15.5 0.1 0.0 <total>
+
+# CHECK: [3] Code Region - saba
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeeER . . . . saba v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . saba v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [0,3] D=========eeeeER . . . saba v0.4s, v0.4s, v1.4s
+# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D=================eeeeER . . saba v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [1,2] D==================eeeeER. . saba v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [1,3] D======================eeeeER saba v0.4s, v0.4s, v1.4s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 saba v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 saba v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 saba v0.4s, v0.4s, v1.4s
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [4] Code Region - sdot
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1103
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeeER . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeER. . . . sdot v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [0,2] D=====eeeER . . . sdot v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [0,3] D========eeeER . . . sdot v0.4s, v0.16b, v1.16b
+# CHECK-NEXT: [1,0] D===========eeeeER . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D===============eeeER . sdot v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [1,2] D================eeeER . sdot v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [1,3] D===================eeeER sdot v0.4s, v0.16b, v1.16b
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 6.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 sdot v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: 2. 2 11.5 0.0 0.0 sdot v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: 3. 2 14.5 0.0 0.0 sdot v0.4s, v0.16b, v1.16b
+# CHECK-NEXT: 2 10.8 0.1 0.0 <total>
+
+# CHECK: [5] Code Region - smmla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1103
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeeER . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeER. . . . smmla v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [0,2] D=====eeeER . . . smmla v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [0,3] D========eeeER . . . smmla v0.4s, v0.16b, v1.16b
+# CHECK-NEXT: [1,0] D===========eeeeER . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D===============eeeER . smmla v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [1,2] D================eeeER . smmla v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: [1,3] D===================eeeER smmla v0.4s, v0.16b, v1.16b
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 6.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 smmla v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: 2. 2 11.5 0.0 0.0 smmla v0.4s, v1.16b, v2.16b
+# CHECK-NEXT: 3. 2 14.5 0.0 0.0 smmla v0.4s, v0.16b, v1.16b
+# CHECK-NEXT: 2 10.8 0.1 0.0 <total>
+
+# CHECK: [6] Code Region - mla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeeER . . . . mla v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . mla v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [0,3] D=========eeeeER . . . mla v0.4s, v0.4s, v1.4s
+# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D=================eeeeER . . mla v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [1,2] D==================eeeeER. . mla v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [1,3] D======================eeeeER mla v0.4s, v0.4s, v1.4s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 mla v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 mla v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 mla v0.4s, v0.4s, v1.4s
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [7] Code Region - sqrdmlah
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.29
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeeER . . . . sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [0,2] D======eeeeER . . . . sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [0,3] D==========eeeeER . . . sqrdmlah v0.4s, v0.4s, v1.4s
+# CHECK-NEXT: [1,0] D==============eeeeER . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D==================eeeeER. . sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [1,2] D====================eeeeER . sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: [1,3] D========================eeeeER sqrdmlah v0.4s, v0.4s, v1.4s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 sqrdmlah v0.4s, v0.4s, v1.4s
+# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
+
+# CHECK: [8] Code Region - smlal2
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeeER . . . . smlal2 v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . smlal2 v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,3] D=========eeeeER . . . smlal2 v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D=================eeeeER . . smlal2 v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,2] D==================eeeeER. . smlal2 v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,3] D======================eeeeER smlal2 v0.4s, v0.8h, v1.8h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 smlal2 v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 smlal2 v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 smlal2 v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [9] Code Region - sadalp
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeeER . . . . sadalp v0.2d, v1.4s
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . sadalp v0.2d, v1.4s
+# CHECK-NEXT: [0,3] D=========eeeeER . . . sadalp v0.2d, v0.4s
+# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D=================eeeeER . . sadalp v0.2d, v1.4s
+# CHECK-NEXT: [1,2] D==================eeeeER. . sadalp v0.2d, v1.4s
+# CHECK-NEXT: [1,3] D======================eeeeER sadalp v0.2d, v0.4s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 sadalp v0.2d, v1.4s
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 sadalp v0.2d, v1.4s
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 sadalp v0.2d, v0.4s
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [10] Code Region - ssra
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeER . . . . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D====eeeeER . . . . ssra v0.2d, v1.2d, #1
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . ssra v0.2d, v1.2d, #1
+# CHECK-NEXT: [0,3] D=========eeeeER . . . ssra v0.2d, v0.2d, #1
+# CHECK-NEXT: [1,0] D=============eeeeER. . . mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D=================eeeeER . . ssra v0.2d, v1.2d, #1
+# CHECK-NEXT: [1,2] D==================eeeeER. . ssra v0.2d, v1.2d, #1
+# CHECK-NEXT: [1,3] D======================eeeeER ssra v0.2d, v0.2d, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 mul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 ssra v0.2d, v1.2d, #1
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 ssra v0.2d, v1.2d, #1
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 ssra v0.2d, v0.2d, #1
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [11] Code Region - fcmla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeER . . . . . fmul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [0,1] D===eeeeER. . . . . fcmla v0.2d, v1.2d, v2.2d, #90
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . fcmla v0.2d, v1.2d, v2.2d, #90
+# CHECK-NEXT: [0,3] D=========eeeeER . . . fcmla v0.2d, v0.2d, v1.2d, #90
+# CHECK-NEXT: [1,0] D=============eeeER . . . fmul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: [1,1] D================eeeeER . . fcmla v0.2d, v1.2d, v2.2d, #90
+# CHECK-NEXT: [1,2] D==================eeeeER. . fcmla v0.2d, v1.2d, v2.2d, #90
+# CHECK-NEXT: [1,3] D======================eeeeER fcmla v0.2d, v0.2d, v1.2d, #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fcmla v0.2d, v1.2d, v2.2d, #90
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fcmla v0.2d, v1.2d, v2.2d, #90
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fcmla v0.2d, v0.2d, v1.2d, #90
+# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
+
+# CHECK: [12] Code Region - fmla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 1703
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.35
+# CHECK-NEXT: IPC: 0.35
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . .. fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,1] D=eeeeER . . . . . .. fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: [0,2] D=====eeER. . . . . .. fadd v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,3] D=======eeeeER . . . . .. fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: [0,4] D=========eeeeER . . . .. fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: [0,5] D=============eeeeER. . . .. fmla v0.2d, v0.2d, v1.2d
+# CHECK-NEXT: [1,0] D=================eeeER . . .. fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,1] D==================eeeeER. . .. fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: [1,2] D======================eeER . .. fadd v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,3] D========================eeeeER .. fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: [1,4] .D=========================eeeeER .. fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: [1,5] .D=============================eeeeER fmla v0.2d, v0.2d, v1.2d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 9.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: 2. 2 14.5 0.0 0.0 fadd v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: 4. 2 18.0 0.0 0.0 fmla v0.2d, v1.2d, v2.2d
+# CHECK-NEXT: 5. 2 22.0 0.0 0.0 fmla v0.2d, v0.2d, v1.2d
+# CHECK-NEXT: 2 15.2 0.1 0.0 <total>
+
+# CHECK: [13] Code Region - fmlal
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 600
+# CHECK-NEXT: Total Cycles: 1903
+# CHECK-NEXT: Total uOps: 600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.32
+# CHECK-NEXT: IPC: 0.32
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeER . . . . . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,1] D===eeeeER. . . . . . . fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: [0,2] D=======eeER . . . . . . fadd v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,3] D=========eeeeER . . . . . fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: [0,4] D===========eeeeER . . . . . fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: [0,5] D===============eeeeER . . . . fmlal v0.4s, v0.4h, v1.4h
+# CHECK-NEXT: [1,0] D===================eeeER. . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,1] D======================eeeeER . . . fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: [1,2] D==========================eeER . . fadd v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,3] D============================eeeeER. . fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: [1,4] .D=============================eeeeER . fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: [1,5] .D=================================eeeeER fmlal v0.4s, v0.4h, v1.4h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1. 2 13.5 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: 2. 2 17.5 0.0 0.0 fadd v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 3. 2 19.5 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: 4. 2 21.0 0.0 0.0 fmlal v0.4s, v1.4h, v2.4h
+# CHECK-NEXT: 5. 2 25.0 0.0 0.0 fmlal v0.4s, v0.4h, v1.4h
+# CHECK-NEXT: 2 17.8 0.1 0.0 <total>
+
+# CHECK: [14] Code Region - bfdot
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1603
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.25
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfdot v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,2] D======eeeeeER . . . . . bfdot v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,3] D===========eeeeeER . . . . bfdot v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: [1,0] D================eeeER . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,1] D===================eeeeeER . . bfdot v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,2] D======================eeeeeER. . bfdot v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,3] D===========================eeeeeER bfdot v0.4s, v0.8h, v1.8h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 9.0 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 bfdot v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 2. 2 15.0 0.0 0.0 bfdot v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 3. 2 20.0 0.0 0.0 bfdot v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: 2 14.0 0.1 0.0 <total>
+
+# CHECK: [15] Code Region - bfmmla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1903
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.21
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeER . . . . . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,1] D===eeeeeeER . . . . . . bfmmla v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,2] D=======eeeeeeER . . . . . bfmmla v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,3] D=============eeeeeeER . . . . bfmmla v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: [1,0] D===================eeeER. . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,1] D======================eeeeeeER . . bfmmla v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,2] D==========================eeeeeeER. . bfmmla v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,3] D================================eeeeeeER bfmmla v0.4s, v0.8h, v1.8h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1. 2 13.5 0.0 0.0 bfmmla v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 2. 2 17.5 0.0 0.0 bfmmla v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 3. 2 23.5 0.0 0.0 bfmmla v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: 2 16.3 0.1 0.0 <total>
+
+# CHECK: [16] Code Region - bfmlalb
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfmlalb v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . bfmlalb v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [0,3] D==========eeeeeER . . . . bfmlalb v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: [1,0] D===============eeeER . . . fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: [1,1] D==================eeeeeER . . bfmlalb v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,2] D====================eeeeeER . . bfmlalb v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: [1,3] D=========================eeeeeER bfmlalb v0.4s, v0.8h, v1.8h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 bfmlalb v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 2. 2 13.5 0.0 0.0 bfmlalb v0.4s, v1.8h, v2.8h
+# CHECK-NEXT: 3. 2 18.5 0.0 0.0 bfmlalb v0.4s, v0.8h, v1.8h
+# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
+
+# CHECK: [17] Code Region - crc32b
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.57
+# CHECK-NEXT: IPC: 0.57
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . .. mul w0, w0, w0
+# CHECK-NEXT: [0,1] D==eeER . .. crc32b w0, w0, w1
+# CHECK-NEXT: [0,2] D===eeER . .. crc32b w0, w0, w1
+# CHECK-NEXT: [0,3] D=====eeER. .. crc32b w0, w0, w0
+# CHECK-NEXT: [1,0] D=======eeER .. mul w0, w0, w0
+# CHECK-NEXT: [1,1] D=========eeER .. crc32b w0, w0, w1
+# CHECK-NEXT: [1,2] D==========eeER.. crc32b w0, w0, w1
+# CHECK-NEXT: [1,3] D============eeER crc32b w0, w0, w0
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 4.5 0.5 0.0 mul w0, w0, w0
+# CHECK-NEXT: 1. 2 6.5 0.0 0.0 crc32b w0, w0, w1
+# CHECK-NEXT: 2. 2 7.5 0.0 0.0 crc32b w0, w0, w1
+# CHECK-NEXT: 3. 2 9.5 0.0 0.0 crc32b w0, w0, w0
+# CHECK-NEXT: 2 7.0 0.1 0.0 <total>
+
+# CHECK: [18] Code Region - Z saba
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . saba z0.d, z1.d, z2.d
+# CHECK-NEXT: [0,2] D======eeeeER . . . . saba z0.d, z1.d, z2.d
+# CHECK-NEXT: [0,3] D==========eeeeER . . . saba z0.d, z0.d, z1.d
+# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeER . saba z0.d, z1.d, z2.d
+# CHECK-NEXT: [1,2] D====================eeeeER . saba z0.d, z1.d, z2.d
+# CHECK-NEXT: [1,3] D========================eeeeER saba z0.d, z0.d, z1.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.0 0.0 0.0 saba z0.d, z1.d, z2.d
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 saba z0.d, z1.d, z2.d
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 saba z0.d, z0.d, z1.d
+# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+
+# CHECK: [19] Code Region - Z sadalp
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . sadalp z0.d, p0/m, z1.s
+# CHECK-NEXT: [0,2] D======eeeeER . . . . sadalp z0.d, p0/m, z1.s
+# CHECK-NEXT: [0,3] D==========eeeeER . . . sadalp z0.d, p0/m, z0.s
+# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeER . sadalp z0.d, p0/m, z1.s
+# CHECK-NEXT: [1,2] D====================eeeeER . sadalp z0.d, p0/m, z1.s
+# CHECK-NEXT: [1,3] D========================eeeeER sadalp z0.d, p0/m, z0.s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.0 0.0 0.0 sadalp z0.d, p0/m, z1.s
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 sadalp z0.d, p0/m, z1.s
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 sadalp z0.d, p0/m, z0.s
+# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+
+# CHECK: [20] Code Region - Z ssra
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . ssra z0.d, z1.d, #1
+# CHECK-NEXT: [0,2] D======eeeeER . . . . ssra z0.d, z1.d, #1
+# CHECK-NEXT: [0,3] D==========eeeeER . . . ssra z0.d, z0.d, #1
+# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeER . ssra z0.d, z1.d, #1
+# CHECK-NEXT: [1,2] D====================eeeeER . ssra z0.d, z1.d, #1
+# CHECK-NEXT: [1,3] D========================eeeeER ssra z0.d, z0.d, #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.0 0.0 0.0 ssra z0.d, z1.d, #1
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 ssra z0.d, z1.d, #1
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 ssra z0.d, z0.d, #1
+# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+
+# CHECK: [21] Code Region - Z cdot.s
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1203
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeER . . .. cdot z0.s, z1.b, z2.b, #90
+# CHECK-NEXT: [0,2] D======eeeER . . .. cdot z0.s, z1.b, z2.b, #90
+# CHECK-NEXT: [0,3] D=========eeeER. . .. cdot z0.s, z0.b, z1.b, #90
+# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=================eeeER .. cdot z0.s, z1.b, z2.b, #90
+# CHECK-NEXT: [1,2] D==================eeeER .. cdot z0.s, z1.b, z2.b, #90
+# CHECK-NEXT: [1,3] D=====================eeeER cdot z0.s, z0.b, z1.b, #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 cdot z0.s, z1.b, z2.b, #90
+# CHECK-NEXT: 2. 2 13.0 0.0 0.0 cdot z0.s, z1.b, z2.b, #90
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 cdot z0.s, z0.b, z1.b, #90
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [22] Code Region - Z cdot.d
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1203
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeER . . .. cdot z0.d, z1.h, z2.h, #90
+# CHECK-NEXT: [0,2] D======eeeER . . .. cdot z0.d, z1.h, z2.h, #90
+# CHECK-NEXT: [0,3] D=========eeeER. . .. cdot z0.d, z0.h, z1.h, #90
+# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=================eeeER .. cdot z0.d, z1.h, z2.h, #90
+# CHECK-NEXT: [1,2] D==================eeeER .. cdot z0.d, z1.h, z2.h, #90
+# CHECK-NEXT: [1,3] D=====================eeeER cdot z0.d, z0.h, z1.h, #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 cdot z0.d, z1.h, z2.h, #90
+# CHECK-NEXT: 2. 2 13.0 0.0 0.0 cdot z0.d, z1.h, z2.h, #90
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 cdot z0.d, z0.h, z1.h, #90
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [23] Code Region - Z cmla.b
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . cmla z0.b, z1.b, z2.b, #90
+# CHECK-NEXT: [0,2] D======eeeeER . . . . cmla z0.b, z1.b, z2.b, #90
+# CHECK-NEXT: [0,3] D==========eeeeER . . . cmla z0.b, z0.b, z1.b, #90
+# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeER . cmla z0.b, z1.b, z2.b, #90
+# CHECK-NEXT: [1,2] D====================eeeeER . cmla z0.b, z1.b, z2.b, #90
+# CHECK-NEXT: [1,3] D========================eeeeER cmla z0.b, z0.b, z1.b, #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.0 0.0 0.0 cmla z0.b, z1.b, z2.b, #90
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 cmla z0.b, z1.b, z2.b, #90
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 cmla z0.b, z0.b, z1.b, #90
+# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+
+# CHECK: [24] Code Region - Z cmla.d
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1803
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.28
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . cmla z0.d, z1.d, z2.d, #90
+# CHECK-NEXT: [0,2] D========eeeeeER . . . . . cmla z0.d, z1.d, z2.d, #90
+# CHECK-NEXT: [0,3] D=============eeeeeER . . . . cmla z0.d, z0.d, z1.d, #90
+# CHECK-NEXT: [1,0] D==================eeeeeER . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=======================eeeeeER . . cmla z0.d, z1.d, z2.d, #90
+# CHECK-NEXT: [1,2] D==========================eeeeeER . . cmla z0.d, z1.d, z2.d, #90
+# CHECK-NEXT: [1,3] D===============================eeeeeER cmla z0.d, z0.d, z1.d, #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 15.0 0.0 0.0 cmla z0.d, z1.d, z2.d, #90
+# CHECK-NEXT: 2. 2 18.0 0.0 0.0 cmla z0.d, z1.d, z2.d, #90
+# CHECK-NEXT: 3. 2 23.0 0.0 0.0 cmla z0.d, z0.d, z1.d, #90
+# CHECK-NEXT: 2 16.5 0.1 0.0 <total>
+
+# CHECK: [25] Code Region - Z sdot.s
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1203
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeER . . .. sdot z0.s, z1.b, z2.b
+# CHECK-NEXT: [0,2] D======eeeER . . .. sdot z0.s, z1.b, z2.b
+# CHECK-NEXT: [0,3] D=========eeeER. . .. sdot z0.s, z0.b, z1.b
+# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.s, z1.b, z2.b
+# CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.s, z1.b, z2.b
+# CHECK-NEXT: [1,3] D=====================eeeER sdot z0.s, z0.b, z1.b
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.s, z1.b, z2.b
+# CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.s, z1.b, z2.b
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.s, z0.b, z1.b
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [26] Code Region - Z sudot
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1203
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeER . . .. sdot z0.s, z1.b, z2.b[1]
+# CHECK-NEXT: [0,2] D======eeeER . . .. sdot z0.s, z1.b, z2.b[1]
+# CHECK-NEXT: [0,3] D=========eeeER. . .. sdot z0.s, z0.b, z1.b[1]
+# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.s, z1.b, z2.b[1]
+# CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.s, z1.b, z2.b[1]
+# CHECK-NEXT: [1,3] D=====================eeeER sdot z0.s, z0.b, z1.b[1]
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.s, z1.b, z2.b[1]
+# CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.s, z1.b, z2.b[1]
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.s, z0.b, z1.b[1]
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [27] Code Region - Z sdot.d
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1203
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.42
+# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456
+
+# CHECK: [0,0] DeeeeeER . . . .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeER . . .. sdot z0.d, z1.h, z2.h
+# CHECK-NEXT: [0,2] D======eeeER . . .. sdot z0.d, z1.h, z2.h
+# CHECK-NEXT: [0,3] D=========eeeER. . .. sdot z0.d, z0.h, z1.h
+# CHECK-NEXT: [1,0] D============eeeeeER. .. mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=================eeeER .. sdot z0.d, z1.h, z2.h
+# CHECK-NEXT: [1,2] D==================eeeER .. sdot z0.d, z1.h, z2.h
+# CHECK-NEXT: [1,3] D=====================eeeER sdot z0.d, z0.h, z1.h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 sdot z0.d, z1.h, z2.h
+# CHECK-NEXT: 2. 2 13.0 0.0 0.0 sdot z0.d, z1.h, z2.h
+# CHECK-NEXT: 3. 2 16.0 0.0 0.0 sdot z0.d, z0.h, z1.h
+# CHECK-NEXT: 2 12.0 0.1 0.0 <total>
+
+# CHECK: [28] Code Region - Z smmla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1103
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01234
+
+# CHECK: [0,0] DeeeeER . . . . mul z0.s, z0.s, z0.s
+# CHECK-NEXT: [0,1] D====eeeER. . . . smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: [0,2] D=====eeeER . . . smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: [0,3] D========eeeER . . . smmla z0.s, z0.b, z1.b
+# CHECK-NEXT: [1,0] D===========eeeeER . . mul z0.s, z0.s, z0.s
+# CHECK-NEXT: [1,1] D===============eeeER . smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: [1,2] D================eeeER . smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: [1,3] D===================eeeER smmla z0.s, z0.b, z1.b
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 6.5 0.5 0.0 mul z0.s, z0.s, z0.s
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: 2. 2 11.5 0.0 0.0 smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: 3. 2 14.5 0.0 0.0 smmla z0.s, z0.b, z1.b
+# CHECK-NEXT: 2 10.8 0.1 0.0 <total>
+
+# CHECK: [29] Code Region - Z mla.b
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . mla z0.b, p0/m, z1.b, z2.b
+# CHECK-NEXT: [0,2] D======eeeeER . . . . mla z0.b, p0/m, z1.b, z2.b
+# CHECK-NEXT: [0,3] D==========eeeeER . . . mla z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeER . mla z0.b, p0/m, z1.b, z2.b
+# CHECK-NEXT: [1,2] D====================eeeeER . mla z0.b, p0/m, z1.b, z2.b
+# CHECK-NEXT: [1,3] D========================eeeeER mla z0.b, p0/m, z0.b, z1.b
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.0 0.0 0.0 mla z0.b, p0/m, z1.b, z2.b
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 mla z0.b, p0/m, z1.b, z2.b
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 mla z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+
+# CHECK: [30] Code Region - Z mla.d
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1803
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.28
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . mla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [0,2] D========eeeeeER . . . . . mla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [0,3] D=============eeeeeER . . . . mla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: [1,0] D==================eeeeeER . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=======================eeeeeER . . mla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [1,2] D==========================eeeeeER . . mla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [1,3] D===============================eeeeeER mla z0.d, p0/m, z0.d, z1.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 15.0 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: 2. 2 18.0 0.0 0.0 mla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: 3. 2 23.0 0.0 0.0 mla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: 2 16.5 0.1 0.0 <total>
+
+# CHECK: [31] Code Region - Z smlalb
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1403
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.36
+# CHECK-NEXT: IPC: 0.29
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . smlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [0,2] D======eeeeER . . . . smlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [0,3] D==========eeeeER . . . smlalb z0.d, z0.s, z1.s
+# CHECK-NEXT: [1,0] D==============eeeeeER . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeER . smlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [1,2] D====================eeeeER . smlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [1,3] D========================eeeeER smlalb z0.d, z0.s, z1.s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.0 0.0 0.0 smlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: 2. 2 14.0 0.0 0.0 smlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: 3. 2 18.0 0.0 0.0 smlalb z0.d, z0.s, z1.s
+# CHECK-NEXT: 2 13.3 0.1 0.0 <total>
+
+# CHECK: [32] Code Region - Z sqdmlalb
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . . sqdmlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [0,2] D=======eeeeER . . . . . sqdmlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [0,3] D===========eeeeER . . . . sqdmlalb z0.d, z0.s, z1.s
+# CHECK-NEXT: [1,0] D===============eeeeeER . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D====================eeeeER . . sqdmlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [1,2] D======================eeeeER . . sqdmlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: [1,3] D==========================eeeeER sqdmlalb z0.d, z0.s, z1.s
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.5 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.5 0.0 0.0 sqdmlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: 2. 2 15.5 0.0 0.0 sqdmlalb z0.d, z1.s, z2.s
+# CHECK-NEXT: 3. 2 19.5 0.0 0.0 sqdmlalb z0.d, z0.s, z1.s
+# CHECK-NEXT: 2 14.3 0.1 0.0 <total>
+
+# CHECK: [33] Code Region - Z sqrdmlah.b
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeER . . . . . sqrdmlah z0.b, z1.b, z2.b
+# CHECK-NEXT: [0,2] D=======eeeeER . . . . . sqrdmlah z0.b, z1.b, z2.b
+# CHECK-NEXT: [0,3] D===========eeeeER . . . . sqrdmlah z0.b, z0.b, z1.b
+# CHECK-NEXT: [1,0] D===============eeeeeER . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D====================eeeeER . . sqrdmlah z0.b, z1.b, z2.b
+# CHECK-NEXT: [1,2] D======================eeeeER . . sqrdmlah z0.b, z1.b, z2.b
+# CHECK-NEXT: [1,3] D==========================eeeeER sqrdmlah z0.b, z0.b, z1.b
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.5 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.5 0.0 0.0 sqrdmlah z0.b, z1.b, z2.b
+# CHECK-NEXT: 2. 2 15.5 0.0 0.0 sqrdmlah z0.b, z1.b, z2.b
+# CHECK-NEXT: 3. 2 19.5 0.0 0.0 sqrdmlah z0.b, z0.b, z1.b
+# CHECK-NEXT: 2 14.3 0.1 0.0 <total>
+
+# CHECK: [34] Code Region - Z sqrdmlah.d
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1803
+# CHECK-NEXT: Total uOps: 500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.28
+# CHECK-NEXT: IPC: 0.22
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012345678
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . . . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . sqrdmlah z0.d, z1.d, z2.d
+# CHECK-NEXT: [0,2] D========eeeeeER . . . . . sqrdmlah z0.d, z1.d, z2.d
+# CHECK-NEXT: [0,3] D=============eeeeeER . . . . sqrdmlah z0.d, z0.d, z1.d
+# CHECK-NEXT: [1,0] D==================eeeeeER . . . mul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D=======================eeeeeER . . sqrdmlah z0.d, z1.d, z2.d
+# CHECK-NEXT: [1,2] D==========================eeeeeER . . sqrdmlah z0.d, z1.d, z2.d
+# CHECK-NEXT: [1,3] D===============================eeeeeER sqrdmlah z0.d, z0.d, z1.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.0 0.5 0.0 mul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 15.0 0.0 0.0 sqrdmlah z0.d, z1.d, z2.d
+# CHECK-NEXT: 2. 2 18.0 0.0 0.0 sqrdmlah z0.d, z1.d, z2.d
+# CHECK-NEXT: 3. 2 23.0 0.0 0.0 sqrdmlah z0.d, z0.d, z1.d
+# CHECK-NEXT: 2 16.5 0.1 0.0 <total>
+
+# CHECK: [35] Code Region - Z fcmla ZPmZZ
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: [0,3] D==========eeeeeER . . . . fcmla z0.d, p0/m, z0.d, z1.d, #90
+# CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D==================eeeeeER . . fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: [1,2] D====================eeeeeER . . fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: [1,3] D=========================eeeeeER fcmla z0.d, p0/m, z0.d, z1.d, #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: 2. 2 13.5 0.0 0.0 fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: 3. 2 18.5 0.0 0.0 fcmla z0.d, p0/m, z0.d, z1.d, #90
+# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
+
+# CHECK: [36] Code Region - Z fcmla ZZZI
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . fcmla z0.s, z1.s, z2.s[1], #90
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . fcmla z0.s, z1.s, z2.s[1], #90
+# CHECK-NEXT: [0,3] D==========eeeeeER . . . . fcmla z0.s, z0.s, z1.s[1], #90
+# CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D==================eeeeeER . . fcmla z0.s, z1.s, z2.s[1], #90
+# CHECK-NEXT: [1,2] D====================eeeeeER . . fcmla z0.s, z1.s, z2.s[1], #90
+# CHECK-NEXT: [1,3] D=========================eeeeeER fcmla z0.s, z0.s, z1.s[1], #90
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 fcmla z0.s, z1.s, z2.s[1], #90
+# CHECK-NEXT: 2. 2 13.5 0.0 0.0 fcmla z0.s, z1.s, z2.s[1], #90
+# CHECK-NEXT: 3. 2 18.5 0.0 0.0 fcmla z0.s, z0.s, z1.s[1], #90
+# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
+
+# CHECK: [37] Code Region - Z fmla ZPmZZ
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeER . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeER. . . . . fmla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . fmla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [0,3] D=========eeeeER . . . fmla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D================eeeeER . . fmla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [1,2] D==================eeeeER. . fmla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: [1,3] D======================eeeeER fmla z0.d, p0/m, z0.d, z1.d
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmla z0.d, p0/m, z1.d, z2.d
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla z0.d, p0/m, z0.d, z1.d
+# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
+
+# CHECK: [38] Code Region - Z fmla ZZZI
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeER . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeER. . . . . fmla z0.d, z1.d, z2.d[1]
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . fmla z0.d, z1.d, z2.d[1]
+# CHECK-NEXT: [0,3] D=========eeeeER . . . fmla z0.d, z0.d, z1.d[1]
+# CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D================eeeeER . . fmla z0.d, z1.d, z2.d[1]
+# CHECK-NEXT: [1,2] D==================eeeeER. . fmla z0.d, z1.d, z2.d[1]
+# CHECK-NEXT: [1,3] D======================eeeeER fmla z0.d, z0.d, z1.d[1]
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmla z0.d, z1.d, z2.d[1]
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmla z0.d, z1.d, z2.d[1]
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmla z0.d, z0.d, z1.d[1]
+# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
+
+# CHECK: [39] Code Region - Z fmlalb ZZZ
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1303
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.31
+# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeER . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeER. . . . . fmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,2] D=====eeeeER . . . . fmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,3] D=========eeeeER . . . fmlalb z0.s, z0.h, z1.h
+# CHECK-NEXT: [1,0] D=============eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D================eeeeER . . fmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,2] D==================eeeeER. . fmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,3] D======================eeeeER fmlalb z0.s, z0.h, z1.h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 7.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 10.5 0.0 0.0 fmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: 2. 2 12.5 0.0 0.0 fmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: 3. 2 16.5 0.0 0.0 fmlalb z0.s, z0.h, z1.h
+# CHECK-NEXT: 2 11.8 0.1 0.0 <total>
+
+# CHECK: [40] Code Region - Z bfdot
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1603
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.25
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 01234
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,2] D======eeeeeER . . . . . bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,3] D===========eeeeeER . . . . bfdot z0.s, z0.h, z1.h
+# CHECK-NEXT: [1,0] D================eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D===================eeeeeER . . bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,2] D======================eeeeeER. . bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,3] D===========================eeeeeER bfdot z0.s, z0.h, z1.h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 9.0 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 12.0 0.0 0.0 bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: 2. 2 15.0 0.0 0.0 bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: 3. 2 20.0 0.0 0.0 bfdot z0.s, z0.h, z1.h
+# CHECK-NEXT: 2 14.0 0.1 0.0 <total>
+
+# CHECK: [41] Code Region - Z bfmmla
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1903
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.21
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 0
+
+# CHECK: [0,0] DeeeER . . . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeeeER . . . . . . bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,2] D=======eeeeeeER . . . . . bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,3] D=============eeeeeeER . . . . bfmmla z0.s, z0.h, z1.h
+# CHECK-NEXT: [1,0] D===================eeeER. . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D======================eeeeeeER . . bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,2] D==========================eeeeeeER. . bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,3] D================================eeeeeeER bfmmla z0.s, z0.h, z1.h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 10.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 13.5 0.0 0.0 bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: 2. 2 17.5 0.0 0.0 bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: 3. 2 23.5 0.0 0.0 bfmmla z0.s, z0.h, z1.h
+# CHECK-NEXT: 2 16.3 0.1 0.0 <total>
+
+# CHECK: [42] Code Region - bfmlalb
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 400
+# CHECK-NEXT: Total Cycles: 1503
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.27
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 012
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeER . . . . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [0,1] D===eeeeeER . . . . . bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,2] D=====eeeeeER . . . . . bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [0,3] D==========eeeeeER . . . . bfmlalb z0.s, z0.h, z1.h
+# CHECK-NEXT: [1,0] D===============eeeER . . . fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: [1,1] D==================eeeeeER . . bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,2] D====================eeeeeER . . bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: [1,3] D=========================eeeeeER bfmlalb z0.s, z0.h, z1.h
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 2 8.5 0.5 0.0 fmul z0.d, z0.d, z0.d
+# CHECK-NEXT: 1. 2 11.5 0.0 0.0 bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: 2. 2 13.5 0.0 0.0 bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: 3. 2 18.5 0.0 0.0 bfmlalb z0.s, z0.h, z1.h
+# CHECK-NEXT: 2 13.0 0.1 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-neon-instructions.s
new file mode 100644
index 0000000..533d613
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-neon-instructions.s
@@ -0,0 +1,3729 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3ae -mattr=+aes,+sha3,+sm4 -instruction-tables < %s | FileCheck %s
+
+abs d29, d24
+abs v0.16b, v0.16b
+abs v0.2d, v0.2d
+abs v0.2s, v0.2s
+abs v0.4h, v0.4h
+abs v0.4s, v0.4s
+abs v0.8b, v0.8b
+abs v0.8h, v0.8h
+add d17, d31, d29
+add v0.8b, v0.8b, v0.8b
+addhn v0.2s, v0.2d, v0.2d
+addhn v0.4h, v0.4s, v0.4s
+addhn v0.8b, v0.8h, v0.8h
+addhn2 v0.16b, v0.8h, v0.8h
+addhn2 v0.4s, v0.2d, v0.2d
+addhn2 v0.8h, v0.4s, v0.4s
+addp v0.2d, v0.2d, v0.2d
+addp v0.8b, v0.8b, v0.8b
+addv s0, v0.4s
+addv h0, v0.4h
+addv h0, v0.8h
+addv b0, v0.8b
+addv b0, v0.16b
+aesd v0.16b, v0.16b
+aese v0.16b, v0.16b
+aesimc v0.16b, v0.16b
+aesmc v0.16b, v0.16b
+and v0.8b, v0.8b, v0.8b
+bfcvt h0, s0
+bfcvtn v0.4h, v0.4s
+bfcvtn2 v0.8h, v0.4s
+bfdot v0.2s, v0.4h, v0.4h
+bfdot v0.4s, v0.8h, v0.8h
+bfmlalb v0.4s, v0.8h, v0.8h
+bfmlalb v0.4s, v0.8h, v0.h[3]
+bfmlalt v0.4s, v0.8h, v0.8h
+bfmlalt v0.4s, v0.8h, v0.h[3]
+bfmmla v0.4s, v0.8h, v0.8h
+bic v0.4h, #15, lsl #8
+bic v0.8b, v0.8b, v0.8b
+bif v0.16b, v0.16b, v0.16b
+bit v0.16b, v0.16b, v0.16b
+bsl v0.8b, v0.8b, v0.8b
+cls v0.16b, v0.16b
+cls v0.2s, v0.2s
+cls v0.4h, v0.4h
+cls v0.4s, v0.4s
+cls v0.8b, v0.8b
+cls v0.8h, v0.8h
+clz v0.16b, v0.16b
+clz v0.2s, v0.2s
+clz v0.4h, v0.4h
+clz v0.4s, v0.4s
+clz v0.8b, v0.8b
+clz v0.8h, v0.8h
+cmeq d20, d21, 0
+cmeq d20, d21, d22
+cmeq v0.16b, v0.16b, 0
+cmeq v0.16b, v0.16b, v0.16b
+cmge d20, d21, 0
+cmge d20, d21, d22
+cmge v0.4h, v0.4h, v0.4h
+cmge v0.8b, v0.8b, 0
+cmgt d20, d21, 0
+cmgt d20, d21, d22
+cmgt v0.2s, v0.2s, 0
+cmgt v0.4s, v0.4s, v0.4s
+cmhi d20, d21, d22
+cmhi v0.8h, v0.8h, v0.8h
+cmhs d20, d21, d22
+cmhs v0.8b, v0.8b, v0.8b
+cmle d20, d21, 0
+cmle v0.2d, v0.2d, 0
+cmlt d20, d21, 0
+cmlt v0.8h, v0.8h, 0
+cmtst d20, d21, d22
+cmtst v0.2s, v0.2s, v0.2s
+cnt v0.16b, v0.16b
+cnt v0.8b, v0.8b
+dup v0.16b,w28
+dup v0.2d,x28
+dup v0.2s,w28
+dup v0.4h,w28
+dup v0.4s,w28
+dup v0.8b,w28
+dup v0.8h,w28
+dup b0, v0.b[1]
+dup d0, v0.d[1]
+dup h0, v0.h[1]
+dup s0, v0.s[1]
+dup v0.16b, v0.b[1]
+dup v0.2d, v0.d[1]
+dup v0.2s, v0.s[1]
+dup v0.4h, v0.h[1]
+dup v0.4s, v0.s[1]
+dup v0.8b, v0.b[1]
+dup v0.8h, v0.h[1]
+eor v0.16b, v0.16b, v0.16b
+ext v0.16b, v0.16b, v0.16b, #3
+ext v0.8b, v0.8b, v0.8b, #3
+fabd d29, d24, d20
+fabd s29, s24, s20
+fabd v0.4s, v0.4s, v0.4s
+fabs v0.2d, v0.2d
+fabs v0.2s, v0.2s
+fabs v0.4h, v0.4h
+fabs v0.4s, v0.4s
+fabs v0.8h, v0.8h
+facge d20, d21, d22
+facge s10, s11, s12
+facge v0.4s, v0.4s, v0.4s
+facgt d20, d21, d22
+facgt s10, s11, s12
+facgt v0.2d, v0.2d, v0.2d
+fadd v0.4s, v0.4s, v0.4s
+faddp v0.2s, v0.2s, v0.2s
+faddp v0.4s, v0.4s, v0.4s
+fcadd v0.2s, v0.2s, v0.2s, 90
+fcadd v0.4s, v0.4s, v0.4s, 270
+fcmeq d20, d21, #0.0
+fcmeq d20, d21, d22
+fcmeq s10, s11, #0.0
+fcmeq s10, s11, s12
+fcmeq v0.2s, v0.2s, #0.0
+fcmeq v0.2s, v0.2s, v0.2s
+fcmge d20, d21, #0.0
+fcmge d20, d21, d22
+fcmge s10, s11, #0.0
+fcmge s10, s11, s12
+fcmge v0.2d, v0.2d, #0.0
+fcmge v0.4s, v0.4s, v0.4s
+fcmgt d20, d21, #0.0
+fcmgt d20, d21, d22
+fcmgt s10, s11, #0.0
+fcmgt s10, s11, s12
+fcmgt v0.4s, v0.4s, #0.0
+fcmgt v0.4s, v0.4s, v0.4s
+fcmla v0.2s, v0.2s, v0.2s, #90
+fcmla v0.4s, v0.4s, v0.s[1], #0
+fcmle d20, d21, #0.0
+fcmle s10, s11, #0.0
+fcmle v0.2d, v0.2d, #0.0
+fcmlt d20, d21, #0.0
+fcmlt s10, s11, #0.0
+fcmlt v0.4s, v0.4s, #0.0
+fcvtas d21, d14
+fcvtas s12, s13
+fcvtas h12, h13
+fcvtas v0.2d, v0.2d
+fcvtas v0.2s, v0.2s
+fcvtas v0.4h, v0.4h
+fcvtas v0.4s, v0.4s
+fcvtas v0.8h, v0.8h
+fcvtau d21, d14
+fcvtau s12, s13
+fcvtau h12, h13
+fcvtau v0.2d, v0.2d
+fcvtau v0.2s, v0.2s
+fcvtau v0.4h, v0.4h
+fcvtau v0.4s, v0.4s
+fcvtau v0.8h, v0.8h
+fcvtl v0.2d, v0.2s
+fcvtl v0.4s, v0.4h
+fcvtl2 v0.2d, v0.4s
+fcvtl2 v0.4s, v0.8h
+fcvtms d21, d14
+fcvtms s22, s13
+fcvtms h22, h13
+fcvtms v0.2d, v0.2d
+fcvtms v0.2s, v0.2s
+fcvtms v0.4h, v0.4h
+fcvtms v0.4s, v0.4s
+fcvtms v0.8h, v0.8h
+fcvtmu d21, d14
+fcvtmu s12, s13
+fcvtmu h12, h13
+fcvtmu v0.2d, v0.2d
+fcvtmu v0.2s, v0.2s
+fcvtmu v0.4h, v0.4h
+fcvtmu v0.4s, v0.4s
+fcvtmu v0.8h, v0.8h
+fcvtn v0.2s, v0.2d
+fcvtn v0.4h, v0.4s
+fcvtn2 v0.4s, v0.2d
+fcvtn2 v0.8h, v0.4s
+fcvtns d21, d14
+fcvtns s22, s13
+fcvtns h22, h13
+fcvtns v0.2d, v0.2d
+fcvtns v0.2s, v0.2s
+fcvtns v0.4h, v0.4h
+fcvtns v0.4s, v0.4s
+fcvtns v0.8h, v0.8h
+fcvtnu d21, d14
+fcvtnu s12, s13
+fcvtnu h12, h13
+fcvtnu v0.2d, v0.2d
+fcvtnu v0.2s, v0.2s
+fcvtnu v0.4h, v0.4h
+fcvtnu v0.4s, v0.4s
+fcvtnu v0.8h, v0.8h
+fcvtps d21, d14
+fcvtps s22, s13
+fcvtps h22, h13
+fcvtps v0.2d, v0.2d
+fcvtps v0.2s, v0.2s
+fcvtps v0.4h, v0.4h
+fcvtps v0.4s, v0.4s
+fcvtps v0.8h, v0.8h
+fcvtpu d21, d14
+fcvtpu s12, s13
+fcvtpu h12, h13
+fcvtpu v0.2d, v0.2d
+fcvtpu v0.2s, v0.2s
+fcvtpu v0.4h, v0.4h
+fcvtpu v0.4s, v0.4s
+fcvtpu v0.8h, v0.8h
+fcvtxn s22, d13
+fcvtxn v0.2s, v0.2d
+fcvtxn2 v0.4s, v0.2d
+fcvtzs d21, d12, #1
+fcvtzs d21, d14
+fcvtzs s12, s13
+fcvtzs s21, s12, #1
+fcvtzs h21, h14
+fcvtzs h21, h12, #1
+fcvtzs v0.2d, v0.2d
+fcvtzs v0.2d, v0.2d, #3
+fcvtzs v0.2s, v0.2s
+fcvtzs v0.2s, v0.2s, #3
+fcvtzs v0.4h, v0.4h
+fcvtzs v0.4s, v0.4s
+fcvtzs v0.4s, v0.4s, #3
+fcvtzs v0.8h, v0.8h
+fcvtzu d21, d12, #1
+fcvtzu d21, d14
+fcvtzu s12, s13
+fcvtzu s21, s12, #1
+fcvtzu h12, h13
+fcvtzu h21, h12, #1
+fcvtzu v0.2d, v0.2d
+fcvtzu v0.2d, v0.2d, #3
+fcvtzu v0.2s, v0.2s
+fcvtzu v0.2s, v0.2s, #3
+fcvtzu v0.4h, v0.4h
+fcvtzu v0.4s, v0.4s
+fcvtzu v0.4s, v0.4s, #3
+fcvtzu v0.8h, v0.8h
+fdiv v0.2d, v0.2d, v0.2d
+fdiv v0.2s, v0.2s, v0.2s
+fdiv v0.4h, v0.4h, v0.4h
+fdiv v0.4s, v0.4s, v0.4s
+fdiv v0.8h, v0.8h, v0.8h
+fmax v0.2d, v0.2d, v0.2d
+fmax v0.2s, v0.2s, v0.2s
+fmax v0.4s, v0.4s, v0.4s
+fmaxnm v0.2d, v0.2d, v0.2d
+fmaxnm v0.2s, v0.2s, v0.2s
+fmaxnm v0.4s, v0.4s, v0.4s
+fmaxnmp v0.2d, v0.2d, v0.2d
+fmaxnmp v0.2s, v0.2s, v0.2s
+fmaxnmp v0.4s, v0.4s, v0.4s
+fmaxp v0.2d, v0.2d, v0.2d
+fmaxp v0.2s, v0.2s, v0.2s
+fmaxp v0.4s, v0.4s, v0.4s
+fmaxv h0, v0.4h
+fmaxv h0, v0.8h
+fmaxv s0, v0.4s
+fmin v0.2d, v0.2d, v0.2d
+fmin v0.2s, v0.2s, v0.2s
+fmin v0.4s, v0.4s, v0.4s
+fminnm v0.2d, v0.2d, v0.2d
+fminnm v0.2s, v0.2s, v0.2s
+fminnm v0.4s, v0.4s, v0.4s
+fminnmp v0.2d, v0.2d, v0.2d
+fminnmp v0.2s, v0.2s, v0.2s
+fminnmp v0.4s, v0.4s, v0.4s
+fminp v0.2d, v0.2d, v0.2d
+fminp v0.2s, v0.2s, v0.2s
+fminp v0.4s, v0.4s, v0.4s
+fmla d0, d1, v0.d[1]
+fmla s0, s1, v0.s[3]
+fmla v0.2s, v0.2s, v0.2s
+fmlal v0.2s, v0.2h, v0.h[1]
+fmlal v0.4s, v0.4h, v0.h[3]
+fmlal v0.2s, v0.2h, v0.2h
+fmlal v0.4s, v0.4h, v0.4h
+fmlal2 v0.2s, v0.2h, v0.h[1]
+fmlal2 v0.4s, v0.4h, v0.h[3]
+fmlal2 v0.2s, v0.2h, v0.2h
+fmlal2 v0.4s, v0.4h, v0.4h
+fmls d0, d4, v0.d[1]
+fmls s3, s5, v0.s[3]
+fmls v0.2s, v0.2s, v0.2s
+fmlsl v0.2s, v0.2h, v0.h[1]
+fmlsl v0.4s, v0.4h, v0.h[3]
+fmlsl v0.2s, v0.2h, v0.2h
+fmlsl v0.4s, v0.4h, v0.4h
+fmlsl2 v0.2s, v0.2h, v0.h[1]
+fmlsl2 v0.4s, v0.4h, v0.h[3]
+fmlsl2 v0.2s, v0.2h, v0.2h
+fmlsl2 v0.4s, v0.4h, v0.4h
+fmov v0.2d, #-1.25
+fmov v0.2s, #13.0
+fmov v0.4s, #1.0
+fmul d0, d1, v0.d[1]
+fmul s0, s1, v0.s[3]
+fmul v0.2s, v0.2s, v0.2s
+fmulx d0, d4, v0.d[1]
+fmulx d23, d11, d1
+fmulx s20, s22, s15
+fmulx s3, s5, v0.s[3]
+fmulx v0.2d, v0.2d, v0.2d
+fmulx v0.2s, v0.2s, v0.2s
+fmulx v0.4s, v0.4s, v0.4s
+fneg v0.2d, v0.2d
+fneg v0.2s, v0.2s
+fneg v0.4h, v0.4h
+fneg v0.4s, v0.4s
+fneg v0.8h, v0.8h
+frecpe d13, d13
+frecpe s19, s14
+frecpe v0.2d, v0.2d
+frecpe v0.2s, v0.2s
+frecpe v0.4h, v0.4h
+frecpe v0.4s, v0.4s
+frecpe v0.8h, v0.8h
+frecps v0.4s, v0.4s, v0.4s
+frecps d22, d30, d21
+frecps s21, s16, s13
+frecpx d16, d19
+frecpx s18, s10
+frint32x v0.2d, v0.2d
+frint32x v0.2s, v0.2s
+frint32x v0.4s, v0.4s
+frint32z v0.2d, v0.2d
+frint32z v0.2s, v0.2s
+frint32z v0.4s, v0.4s
+frint64x v0.2d, v0.2d
+frint64x v0.2s, v0.2s
+frint64x v0.4s, v0.4s
+frint64z v0.2d, v0.2d
+frint64z v0.2s, v0.2s
+frint64z v0.4s, v0.4s
+frinta v0.2d, v0.2d
+frinta v0.2s, v0.2s
+frinta v0.4h, v0.4h
+frinta v0.4s, v0.4s
+frinta v0.8h, v0.8h
+frinti v0.2d, v0.2d
+frinti v0.2s, v0.2s
+frinti v0.4h, v0.4h
+frinti v0.4s, v0.4s
+frinti v0.8h, v0.8h
+frintm v0.2d, v0.2d
+frintm v0.2s, v0.2s
+frintm v0.4h, v0.4h
+frintm v0.4s, v0.4s
+frintm v0.8h, v0.8h
+frintn v0.2d, v0.2d
+frintn v0.2s, v0.2s
+frintn v0.4h, v0.4h
+frintn v0.4s, v0.4s
+frintn v0.8h, v0.8h
+frintp v0.2d, v0.2d
+frintp v0.2s, v0.2s
+frintp v0.4h, v0.4h
+frintp v0.4s, v0.4s
+frintp v0.8h, v0.8h
+frintx v0.2d, v0.2d
+frintx v0.2s, v0.2s
+frintx v0.4h, v0.4h
+frintx v0.4s, v0.4s
+frintx v0.8h, v0.8h
+frintz v0.2d, v0.2d
+frintz v0.2s, v0.2s
+frintz v0.4h, v0.4h
+frintz v0.4s, v0.4s
+frintz v0.8h, v0.8h
+frsqrte d21, d12
+frsqrte s22, s13
+frsqrte v0.2d, v0.2d
+frsqrte v0.2s, v0.2s
+frsqrte v0.4h, v0.4h
+frsqrte v0.4s, v0.4s
+frsqrte v0.8h, v0.8h
+frsqrts d8, d22, d18
+frsqrts s21, s5, s12
+frsqrts v0.2d, v0.2d, v0.2d
+fsqrt v0.2d, v0.2d
+fsqrt v0.2s, v0.2s
+fsqrt v0.4h, v0.4h
+fsqrt v0.4s, v0.4s
+fsqrt v0.8h, v0.8h
+fsub v0.2s, v0.2s, v0.2s
+ld1 { v0.16b }, [x0]
+ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+ld1 { v0.4s, v1.4s }, [sp], #32
+ld1 { v0.4s, v1.4s, v2.4s }, [sp]
+ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+ld1 { v0.8h }, [x15], x2
+ld1 { v0.8h, v1.8h }, [x15]
+ld1 { v0.b }[9], [x0]
+ld1 { v0.b }[9], [x0], #1
+ld1r { v0.16b }, [x0]
+ld1r { v0.16b }, [x0], #1
+ld1r { v0.8h }, [x15]
+ld1r { v0.8h }, [x15], #2
+ld2 { v0.16b, v1.16b }, [x0], x1
+ld2 { v0.8b, v1.8b }, [x0]
+ld2 { v0.h, v1.h }[7], [x15]
+ld2 { v0.h, v1.h }[7], [x15], #4
+ld2r { v0.2d, v1.2d }, [x0]
+ld2r { v0.2d, v1.2d }, [x0], #16
+ld2r { v0.4s, v1.4s }, [sp]
+ld2r { v0.4s, v1.4s }, [sp], #8
+ld3 { v0.4h, v1.4h, v2.4h }, [x15]
+ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+ld3 { v0.s, v1.s, v2.s }[3], [sp]
+ld3 { v0.s, v1.s, v2.s }[3], [sp], x3
+ld3r { v0.4h, v1.4h, v2.4h }, [x15]
+ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6
+ld3r { v0.8b, v1.8b, v2.8b }, [x0]
+ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3
+ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
+ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32
+ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
+ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp]
+ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7
+ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30
+mla v0.8b, v0.8b, v0.8b
+mls v0.4h, v0.4h, v0.4h
+mov b0, v0.b[15]
+mov d6, v0.d[1]
+mov h2, v0.h[5]
+mov s17, v0.s[2]
+mov v2.b[0], v0.b[0]
+mov v2.h[1], v0.h[1]
+mov v2.s[2], v0.s[2]
+mov v2.d[1], v0.d[1]
+mov v0.b[0], w8
+mov v0.h[1], w8
+mov v0.s[2], w8
+mov v0.d[1], x8
+mov v0.16b, v0.16b
+mov v0.8b, v0.8b
+movi d15, #0xff00ff00ff00ff
+movi v0.16b, #31
+movi v0.2d, #0xff0000ff0000ffff
+movi v0.2s, #8, msl #8
+movi v0.4s, #255, lsl #24
+movi v0.8b, #255
+mul v0.8b, v0.8b, v0.8b
+mvni v0.2s, 0
+mvni v0.4s, #16, msl #16
+neg d29, d24
+neg v0.16b, v0.16b
+neg v0.2d, v0.2d
+neg v0.2s, v0.2s
+neg v0.4h, v0.4h
+neg v0.4s, v0.4s
+neg v0.8b, v0.8b
+neg v0.8h, v0.8h
+not v0.16b, v0.16b
+not v0.8b, v0.8b
+orn v0.16b, v0.16b, v0.16b
+orr v0.16b, v0.16b, v0.16b
+orr v0.8h, #31
+pmul v0.16b, v0.16b, v0.16b
+pmul v0.8b, v0.8b, v0.8b
+pmull v0.8h, v0.8b, v0.8b
+pmull2 v0.8h, v0.16b, v0.16b
+raddhn v0.2s, v0.2d, v0.2d
+raddhn v0.4h, v0.4s, v0.4s
+raddhn v0.8b, v0.8h, v0.8h
+raddhn2 v0.16b, v0.8h, v0.8h
+raddhn2 v0.4s, v0.2d, v0.2d
+raddhn2 v0.8h, v0.4s, v0.4s
+rbit v0.16b, v0.16b
+rbit v0.8b, v0.8b
+rev16 v21.8b, v1.8b
+rev16 v30.16b, v31.16b
+rev32 v0.4h, v9.4h
+rev32 v21.8b, v1.8b
+rev32 v30.16b, v31.16b
+rev32 v4.8h, v7.8h
+rev64 v0.16b, v31.16b
+rev64 v1.8b, v9.8b
+rev64 v13.4h, v21.4h
+rev64 v2.8h, v4.8h
+rev64 v4.2s, v0.2s
+rev64 v6.4s, v8.4s
+rshrn v0.2s, v0.2d, #3
+rshrn v0.4h, v0.4s, #3
+rshrn v0.8b, v0.8h, #3
+rshrn2 v0.16b, v0.8h, #3
+rshrn2 v0.4s, v0.2d, #3
+rshrn2 v0.8h, v0.4s, #3
+rsubhn v0.2s, v0.2d, v0.2d
+rsubhn v0.4h, v0.4s, v0.4s
+rsubhn v0.8b, v0.8h, v0.8h
+rsubhn2 v0.16b, v0.8h, v0.8h
+rsubhn2 v0.4s, v0.2d, v0.2d
+rsubhn2 v0.8h, v0.4s, v0.4s
+saba v0.16b, v0.16b, v0.16b
+sabal v0.2d, v0.2s, v0.2s
+sabal v0.4s, v0.4h, v0.4h
+sabal v0.8h, v0.8b, v0.8b
+sabal2 v0.2d, v0.4s, v0.4s
+sabal2 v0.4s, v0.8h, v0.8h
+sabal2 v0.8h, v0.16b, v0.16b
+sabd v0.4h, v0.4h, v0.4h
+sabdl v0.2d, v0.2s, v0.2s
+sabdl v0.4s, v0.4h, v0.4h
+sabdl v0.8h, v0.8b, v0.8b
+sabdl2 v0.2d, v0.4s, v0.4s
+sabdl2 v0.4s, v0.8h, v0.8h
+sabdl2 v0.8h, v0.16b, v0.16b
+sadalp v0.1d, v0.2s
+sadalp v0.2d, v0.4s
+sadalp v0.2s, v0.4h
+sadalp v0.4h, v0.8b
+sadalp v0.4s, v0.8h
+sadalp v0.8h, v0.16b
+saddl v0.2d, v0.2s, v0.2s
+saddl v0.4s, v0.4h, v0.4h
+saddl v0.8h, v0.8b, v0.8b
+saddl2 v0.2d, v0.4s, v0.4s
+saddl2 v0.4s, v0.8h, v0.8h
+saddl2 v0.8h, v0.16b, v0.16b
+saddlp v0.1d, v0.2s
+saddlp v0.2d, v0.4s
+saddlp v0.2s, v0.4h
+saddlp v0.4h, v0.8b
+saddlp v0.4s, v0.8h
+saddlp v0.8h, v0.16b
+saddlv d0, v0.4s
+saddlv s0, v0.4h
+saddlv s0, v0.8h
+saddlv h0, v0.8b
+saddlv h0, v0.16b
+saddw v0.2d, v0.2d, v0.2s
+saddw v0.4s, v0.4s, v0.4h
+saddw v0.8h, v0.8h, v0.8b
+saddw2 v0.2d, v0.2d, v0.4s
+saddw2 v0.4s, v0.4s, v0.8h
+saddw2 v0.8h, v0.8h, v0.16b
+scvtf d21, d12
+scvtf d21, d12, #64
+scvtf s22, s13
+scvtf s22, s13, #32
+scvtf v0.2d, v0.2d
+scvtf v0.2d, v0.2d, #3
+scvtf v0.2s, v0.2s
+scvtf v0.2s, v0.2s, #3
+scvtf v0.4h, v0.4h
+scvtf v0.4s, v0.4s
+scvtf v0.4s, v0.4s, #3
+scvtf v0.8h, v0.8h
+sdot v0.2s, v0.8b, v0.4b[2]
+sdot v0.2s, v0.8b, v0.8b
+sdot v0.4s, v0.16b, v0.16b
+sdot v0.4s, v0.16b, v0.4b[2]
+shadd v0.8b, v0.8b, v0.8b
+shl d7, d10, #12
+shl v0.16b, v0.16b, #3
+shl v0.2d, v0.2d, #3
+shl v0.4h, v0.4h, #3
+shl v0.4s, v0.4s, #3
+shll v0.2d, v0.2s, #32
+shll v0.4s, v0.4h, #16
+shll v0.8h, v0.8b, #8
+shll v0.2d, v0.2s, #32
+shll v0.4s, v0.4h, #16
+shll v0.8h, v0.8b, #8
+shll2 v0.2d, v0.4s, #32
+shll2 v0.4s, v0.8h, #16
+shll2 v0.8h, v0.16b, #8
+shll2 v0.2d, v0.4s, #32
+shll2 v0.4s, v0.8h, #16
+shll2 v0.8h, v0.16b, #8
+shrn v0.2s, v0.2d, #3
+shrn v0.4h, v0.4s, #3
+shrn v0.8b, v0.8h, #3
+shrn2 v0.16b, v0.8h, #3
+shrn2 v0.4s, v0.2d, #3
+shrn2 v0.8h, v0.4s, #3
+shsub v0.2s, v0.2s, v0.2s
+shsub v0.4h, v0.4h, v0.4h
+sli d10, d14, #12
+sli v0.16b, v0.16b, #3
+sli v0.2d, v0.2d, #3
+sli v0.2s, v0.2s, #3
+sli v0.4h, v0.4h, #3
+sli v0.4s, v0.4s, #3
+sli v0.8b, v0.8b, #3
+sli v0.8h, v0.8h, #3
+smax v0.2s, v0.2s, v0.2s
+smax v0.4h, v0.4h, v0.4h
+smax v0.8b, v0.8b, v0.8b
+smaxp v0.2s, v0.2s, v0.2s
+smaxp v0.4h, v0.4h, v0.4h
+smaxp v0.8b, v0.8b, v0.8b
+smaxv b0, v0.8b
+smaxv b0, v0.16b
+smaxv h0, v0.4h
+smaxv h0, v0.8h
+smaxv s0, v0.4s
+smin v0.16b, v0.16b, v0.16b
+smin v0.4s, v0.4s, v0.4s
+smin v0.8h, v0.8h, v0.8h
+sminp v0.16b, v0.16b, v0.16b
+sminp v0.4s, v0.4s, v0.4s
+sminp v0.8h, v0.8h, v0.8h
+sminv b0, v0.8b
+sminv b0, v0.16b
+sminv h0, v0.4h
+sminv h0, v0.8h
+sminv s0, v0.4s
+smlal v0.2d, v0.2s, v0.2s
+smlal v0.4s, v0.4h, v0.4h
+smlal v0.8h, v0.8b, v0.8b
+smlal2 v0.2d, v0.4s, v0.4s
+smlal2 v0.4s, v0.8h, v0.8h
+smlal2 v0.8h, v0.16b, v0.16b
+smlsl v0.2d, v0.2s, v0.2s
+smlsl v0.4s, v0.4h, v0.4h
+smlsl v0.8h, v0.8b, v0.8b
+smlsl2 v0.2d, v0.4s, v0.4s
+smlsl2 v0.4s, v0.8h, v0.8h
+smlsl2 v0.8h, v0.16b, v0.16b
+smmla v0.4s, v0.16b, v0.16b
+smull v0.2d, v0.2s, v0.2s
+smull v0.4s, v0.4h, v0.4h
+smull v0.8h, v0.8b, v0.8b
+smull2 v0.2d, v0.4s, v0.4s
+smull2 v0.4s, v0.8h, v0.8h
+smull2 v0.8h, v0.16b, v0.16b
+sqabs b19, b14
+sqabs d18, d12
+sqabs h21, h15
+sqabs s20, s12
+sqabs v0.16b, v0.16b
+sqabs v0.2d, v0.2d
+sqabs v0.2s, v0.2s
+sqabs v0.4h, v0.4h
+sqabs v0.4s, v0.4s
+sqabs v0.8b, v0.8b
+sqabs v0.8h, v0.8h
+sqadd b20, b11, b15
+sqadd v0.16b, v0.16b, v0.16b
+sqadd v0.2s, v0.2s, v0.2s
+sqdmlal d19, s24, s12
+sqdmlal d8, s9, v0.s[1]
+sqdmlal s0, h0, v0.h[3]
+sqdmlal s17, h27, h12
+sqdmlal v0.2d, v0.2s, v0.2s
+sqdmlal v0.4s, v0.4h, v0.4h
+sqdmlal2 v0.2d, v0.4s, v0.4s
+sqdmlal2 v0.4s, v0.8h, v0.8h
+sqdmlsl d12, s23, s13
+sqdmlsl d8, s9, v0.s[1]
+sqdmlsl s0, h0, v0.h[3]
+sqdmlsl s14, h12, h25
+sqdmlsl v0.2d, v0.2s, v0.2s
+sqdmlsl v0.4s, v0.4h, v0.4h
+sqdmlsl2 v0.2d, v0.4s, v0.4s
+sqdmlsl2 v0.4s, v0.8h, v0.8h
+sqdmulh h10, h11, h12
+sqdmulh h7, h15, v0.h[3]
+sqdmulh s15, s14, v0.s[1]
+sqdmulh s20, s21, s2
+sqdmulh v0.2s, v0.2s, v0.2s
+sqdmulh v0.4s, v0.4s, v0.4s
+sqdmull d1, s1, v0.s[1]
+sqdmull d15, s22, s12
+sqdmull s1, h1, v0.h[3]
+sqdmull s12, h22, h12
+sqdmull v0.2d, v0.2s, v0.2s
+sqdmull v0.4s, v0.4h, v0.4h
+sqdmull2 v0.2d, v0.4s, v0.4s
+sqdmull2 v0.4s, v0.8h, v0.8h
+sqneg b19, b14
+sqneg d18, d12
+sqneg h21, h15
+sqneg s20, s12
+sqneg v0.16b, v0.16b
+sqneg v0.2d, v0.2d
+sqneg v0.2s, v0.2s
+sqneg v0.4h, v0.4h
+sqneg v0.4s, v0.4s
+sqneg v0.8b, v0.8b
+sqneg v0.8h, v0.8h
+sqrdmlah h0, h1, v2.h[3]
+sqrdmlah v0.4h, v1.4h, v2.h[3]
+sqrdmlah v0.8h, v1.8h, v2.h[3]
+sqrdmlah s0, s1, v2.s[1]
+sqrdmlah v0.2s, v1.2s, v2.s[1]
+sqrdmlah v0.4s, v1.4s, v2.s[1]
+sqrdmlah h0, h1, h2
+sqrdmlah v0.4h, v1.4h, v2.4h
+sqrdmlah v0.8h, v1.8h, v2.8h
+sqrdmlah s0, s1, s2
+sqrdmlah v0.2s, v1.2s, v2.2s
+sqrdmlah v0.4s, v1.4s, v2.4s
+sqrdmlsh h0, h1, v2.h[3]
+sqrdmlsh v0.4h, v1.4h, v2.h[3]
+sqrdmlsh v0.8h, v1.8h, v2.h[3]
+sqrdmlsh s0, s1, v2.s[1]
+sqrdmlsh v0.2s, v1.2s, v2.s[1]
+sqrdmlsh v0.4s, v1.4s, v2.s[1]
+sqrdmlsh h0, h1, h2
+sqrdmlsh v0.4h, v1.4h, v2.4h
+sqrdmlsh v0.8h, v1.8h, v2.8h
+sqrdmlsh s0, s1, s2
+sqrdmlsh v0.2s, v1.2s, v2.2s
+sqrdmlsh v0.4s, v1.4s, v2.4s
+sqrdmulh h10, h11, h12
+sqrdmulh h7, h15, v0.h[3]
+sqrdmulh s15, s14, v0.s[1]
+sqrdmulh s20, s21, s2
+sqrdmulh v0.4h, v0.4h, v0.4h
+sqrdmulh v0.8h, v0.8h, v0.8h
+sqrshl d31, d31, d31
+sqrshl h3, h4, h15
+sqrshl v0.2s, v0.2s, v0.2s
+sqrshl v0.4h, v0.4h, v0.4h
+sqrshl v0.8b, v0.8b, v0.8b
+sqrshrn b10, h13, #2
+sqrshrn h15, s10, #6
+sqrshrn s15, d12, #9
+sqrshrn v0.2s, v0.2d, #3
+sqrshrn v0.4h, v0.4s, #3
+sqrshrn v0.8b, v0.8h, #3
+sqrshrn2 v0.16b, v0.8h, #3
+sqrshrn2 v0.4s, v0.2d, #3
+sqrshrn2 v0.8h, v0.4s, #3
+sqrshrun b17, h10, #6
+sqrshrun h10, s13, #15
+sqrshrun s22, d16, #31
+sqrshrun v0.2s, v0.2d, #3
+sqrshrun v0.4h, v0.4s, #3
+sqrshrun v0.8b, v0.8h, #3
+sqrshrun2 v0.16b, v0.8h, #3
+sqrshrun2 v0.4s, v0.2d, #3
+sqrshrun2 v0.8h, v0.4s, #3
+sqshl b11, b19, #7
+sqshl d15, d16, #51
+sqshl d31, d31, d31
+sqshl h13, h18, #11
+sqshl h3, h4, h15
+sqshl s14, s17, #22
+sqshl v0.16b, v0.16b, #3
+sqshl v0.2d, v0.2d, #3
+sqshl v0.2s, v0.2s, #3
+sqshl v0.2s, v0.2s, v0.2s
+sqshl v0.4h, v0.4h, #3
+sqshl v0.4h, v0.4h, v0.4h
+sqshl v0.4s, v0.4s, #3
+sqshl v0.8b, v0.8b, #3
+sqshl v0.8b, v0.8b, v0.8b
+sqshl v0.8h, v0.8h, #3
+sqshlu b15, b18, #6
+sqshlu d11, d13, #32
+sqshlu h19, h17, #6
+sqshlu s16, s14, #25
+sqshlu v0.16b, v0.16b, #3
+sqshlu v0.2d, v0.2d, #3
+sqshlu v0.2s, v0.2s, #3
+sqshlu v0.4h, v0.4h, #3
+sqshlu v0.4s, v0.4s, #3
+sqshlu v0.8b, v0.8b, #3
+sqshlu v0.8h, v0.8h, #3
+sqshrn b10, h15, #5
+sqshrn h17, s10, #4
+sqshrn s18, d10, #31
+sqshrn v0.2s, v0.2d, #3
+sqshrn v0.4h, v0.4s, #3
+sqshrn v0.8b, v0.8h, #3
+sqshrn2 v0.16b, v0.8h, #3
+sqshrn2 v0.4s, v0.2d, #3
+sqshrn2 v0.8h, v0.4s, #3
+sqshrun b15, h10, #7
+sqshrun h20, s14, #3
+sqshrun s10, d15, #15
+sqshrun v0.2s, v0.2d, #3
+sqshrun v0.4h, v0.4s, #3
+sqshrun v0.8b, v0.8h, #3
+sqshrun2 v0.16b, v0.8h, #3
+sqshrun2 v0.4s, v0.2d, #3
+sqshrun2 v0.8h, v0.4s, #3
+sqsub s20, s10, s7
+sqsub v0.2d, v0.2d, v0.2d
+sqsub v0.4s, v0.4s, v0.4s
+sqsub v0.8b, v0.8b, v0.8b
+sqxtn b18, h18
+sqxtn h20, s17
+sqxtn s19, d14
+sqxtn v0.2s, v0.2d
+sqxtn v0.4h, v0.4s
+sqxtn v0.8b, v0.8h
+sqxtn2 v0.16b, v0.8h
+sqxtn2 v0.4s, v0.2d
+sqxtn2 v0.8h, v0.4s
+sqxtun b19, h14
+sqxtun h21, s15
+sqxtun s20, d12
+sqxtun v0.2s, v0.2d
+sqxtun v0.4h, v0.4s
+sqxtun v0.8b, v0.8h
+sqxtun2 v0.16b, v0.8h
+sqxtun2 v0.4s, v0.2d
+sqxtun2 v0.8h, v0.4s
+srhadd v0.2s, v0.2s, v0.2s
+srhadd v0.4h, v0.4h, v0.4h
+srhadd v0.8b, v0.8b, v0.8b
+sri d10, d12, #14
+sri v0.16b, v0.16b, #3
+sri v0.2d, v0.2d, #3
+sri v0.2s, v0.2s, #3
+sri v0.4h, v0.4h, #3
+sri v0.4s, v0.4s, #3
+sri v0.8b, v0.8b, #3
+sri v0.8h, v0.8h, #3
+srshl d16, d16, d16
+srshl v0.2s, v0.2s, v0.2s
+srshl v0.4h, v0.4h, v0.4h
+srshl v0.8b, v0.8b, v0.8b
+srshr d19, d18, #7
+srshr v0.16b, v0.16b, #3
+srshr v0.2d, v0.2d, #3
+srshr v0.2s, v0.2s, #3
+srshr v0.4h, v0.4h, #3
+srshr v0.4s, v0.4s, #3
+srshr v0.8b, v0.8b, #3
+srshr v0.8h, v0.8h, #3
+srsra d15, d11, #19
+srsra v0.16b, v0.16b, #3
+srsra v0.2d, v0.2d, #3
+srsra v0.2s, v0.2s, #3
+srsra v0.4h, v0.4h, #3
+srsra v0.4s, v0.4s, #3
+srsra v0.8b, v0.8b, #3
+srsra v0.8h, v0.8h, #3
+sshl d31, d31, d31
+sshl v0.2d, v0.2d, v0.2d
+sshl v0.2s, v0.2s, v0.2s
+sshl v0.4h, v0.4h, v0.4h
+sshl v0.8b, v0.8b, v0.8b
+sshll v0.2d, v0.2s, #3
+sshll2 v0.4s, v0.8h, #3
+sshr d15, d16, #12
+sshr v0.16b, v0.16b, #3
+sshr v0.2d, v0.2d, #3
+sshr v0.2s, v0.2s, #3
+sshr v0.4h, v0.4h, #3
+sshr v0.4s, v0.4s, #3
+sshr v0.8b, v0.8b, #3
+sshr v0.8h, v0.8h, #3
+ssra d18, d12, #21
+ssra v0.16b, v0.16b, #3
+ssra v0.2d, v0.2d, #3
+ssra v0.2s, v0.2s, #3
+ssra v0.4h, v0.4h, #3
+ssra v0.4s, v0.4s, #3
+ssra v0.8b, v0.8b, #3
+ssra v0.8h, v0.8h, #3
+ssubl v0.2d, v0.2s, v0.2s
+ssubl v0.4s, v0.4h, v0.4h
+ssubl v0.8h, v0.8b, v0.8b
+ssubl2 v0.2d, v0.4s, v0.4s
+ssubl2 v0.4s, v0.8h, v0.8h
+ssubl2 v0.8h, v0.16b, v0.16b
+ssubw v0.2d, v0.2d, v0.2s
+ssubw v0.4s, v0.4s, v0.4h
+ssubw v0.8h, v0.8h, v0.8b
+ssubw2 v0.2d, v0.2d, v0.4s
+ssubw2 v0.4s, v0.4s, v0.8h
+ssubw2 v0.8h, v0.8h, v0.16b
+st1 { v0.16b }, [x0]
+st1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+st1 { v0.4s, v1.4s }, [sp], #32
+st1 { v0.4s, v1.4s, v2.4s }, [sp]
+st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+st1 { v0.8h }, [x15], x2
+st1 { v0.8h, v1.8h }, [x15]
+st1 { v0.d }[1], [x0]
+st1 { v0.d }[1], [x0], #8
+st2 { v0.16b, v1.16b }, [x0], x1
+st2 { v0.8b, v1.8b }, [x0]
+st2 { v0.s, v1.s }[3], [sp]
+st2 { v0.s, v1.s }[3], [sp], #8
+st3 { v0.4h, v1.4h, v2.4h }, [x15]
+st3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+st3 { v0.h, v1.h, v2.h }[7], [x15]
+st3 { v0.h, v1.h, v2.h }[7], [x15], #6
+st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0]
+st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], x5
+sub d15, d5, d16
+sub v0.2d, v0.2d, v0.2d
+sudot v0.2s, v0.8b, v0.4b[2]
+sudot v0.4s, v0.16b, v0.4b[2]
+suqadd b19, b14
+suqadd d18, d22
+suqadd h20, h15
+suqadd s21, s12
+suqadd v0.16b, v0.16b
+suqadd v0.2d, v0.2d
+suqadd v0.2s, v0.2s
+suqadd v0.4h, v0.4h
+suqadd v0.4s, v0.4s
+suqadd v0.8b, v0.8b
+suqadd v0.8h, v0.8h
+tbl v0.16b, { v0.16b }, v0.16b
+tbl v0.16b, { v0.16b, v1.16b }, v0.16b
+tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+tbl v0.8b, { v0.16b }, v0.8b
+tbl v0.8b, { v0.16b, v1.16b }, v0.8b
+tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+tbx v0.16b, { v0.16b }, v0.16b
+tbx v0.16b, { v0.16b, v1.16b }, v0.16b
+tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+tbx v0.8b, { v0.16b }, v0.8b
+tbx v0.8b, { v0.16b, v1.16b }, v0.8b
+tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+trn1 v0.16b, v0.16b, v0.16b
+trn1 v0.2d, v0.2d, v0.2d
+trn1 v0.2s, v0.2s, v0.2s
+trn1 v0.4h, v0.4h, v0.4h
+trn1 v0.4s, v0.4s, v0.4s
+trn1 v0.8b, v0.8b, v0.8b
+trn1 v0.8h, v0.8h, v0.8h
+trn2 v0.16b, v0.16b, v0.16b
+trn2 v0.2d, v0.2d, v0.2d
+trn2 v0.2s, v0.2s, v0.2s
+trn2 v0.4h, v0.4h, v0.4h
+trn2 v0.4s, v0.4s, v0.4s
+trn2 v0.8b, v0.8b, v0.8b
+trn2 v0.8h, v0.8h, v0.8h
+uaba v0.8b, v0.8b, v0.8b
+uabal v0.2d, v0.2s, v0.2s
+uabal v0.4s, v0.4h, v0.4h
+uabal v0.8h, v0.8b, v0.8b
+uabal2 v0.2d, v0.4s, v0.4s
+uabal2 v0.4s, v0.8h, v0.8h
+uabal2 v0.8h, v0.16b, v0.16b
+uabd v0.4h, v0.4h, v0.4h
+uabdl v0.2d, v0.2s, v0.2s
+uabdl v0.4s, v0.4h, v0.4h
+uabdl v0.8h, v0.8b, v0.8b
+uabdl2 v0.2d, v0.4s, v0.4s
+uabdl2 v0.4s, v0.8h, v0.8h
+uabdl2 v0.8h, v0.16b, v0.16b
+uadalp v0.1d, v0.2s
+uadalp v0.2d, v0.4s
+uadalp v0.2s, v0.4h
+uadalp v0.4h, v0.8b
+uadalp v0.4s, v0.8h
+uadalp v0.8h, v0.16b
+uaddl v0.2d, v0.2s, v0.2s
+uaddl v0.4s, v0.4h, v0.4h
+uaddl v0.8h, v0.8b, v0.8b
+uaddl2 v0.2d, v0.4s, v0.4s
+uaddl2 v0.4s, v0.8h, v0.8h
+uaddl2 v0.8h, v0.16b, v0.16b
+uaddlp v0.1d, v0.2s
+uaddlp v0.2d, v0.4s
+uaddlp v0.2s, v0.4h
+uaddlp v0.4h, v0.8b
+uaddlp v0.4s, v0.8h
+uaddlp v0.8h, v0.16b
+uaddlv d0, v0.4s
+uaddlv s0, v0.4h
+uaddlv s0, v0.8h
+uaddlv h0, v0.8b
+uaddlv h0, v0.16b
+uaddw v0.2d, v0.2d, v0.2s
+uaddw v0.4s, v0.4s, v0.4h
+uaddw v0.8h, v0.8h, v0.8b
+uaddw2 v0.2d, v0.2d, v0.4s
+uaddw2 v0.4s, v0.4s, v0.8h
+uaddw2 v0.8h, v0.8h, v0.16b
+ucvtf d21, d14
+ucvtf d21, d14, #64
+ucvtf s22, s13
+ucvtf s22, s13, #32
+ucvtf v0.2d, v0.2d
+ucvtf v0.2d, v0.2d, #3
+ucvtf v0.2s, v0.2s
+ucvtf v0.2s, v0.2s, #3
+ucvtf v0.4h, v0.4h
+ucvtf v0.4s, v0.4s
+ucvtf v0.4s, v0.4s, #3
+ucvtf v0.8h, v0.8h
+udot v0.2s, v0.8b, v0.4b[2]
+udot v0.2s, v0.8b, v0.8b
+udot v0.4s, v0.16b, v0.16b
+udot v0.4s, v0.16b, v0.4b[2]
+uhadd v0.16b, v0.16b, v0.16b
+uhadd v0.8h, v0.8h, v0.8h
+uhsub v0.4s, v0.4s, v0.4s
+umax v0.16b, v0.16b, v0.16b
+umax v0.4s, v0.4s, v0.4s
+umax v0.8h, v0.8h, v0.8h
+umaxp v0.16b, v0.16b, v0.16b
+umaxp v0.4s, v0.4s, v0.4s
+umaxp v0.8h, v0.8h, v0.8h
+umaxv b0, v0.8b
+umaxv b0, v0.16b
+umaxv h0, v0.4h
+umaxv h0, v0.8h
+umaxv s0, v0.4s
+umin v0.2s, v0.2s, v0.2s
+umin v0.4h, v0.4h, v0.4h
+umin v0.8b, v0.8b, v0.8b
+uminp v0.2s, v0.2s, v0.2s
+uminp v0.4h, v0.4h, v0.4h
+uminp v0.8b, v0.8b, v0.8b
+uminv b0, v0.8b
+uminv b0, v0.16b
+uminv h0, v0.4h
+uminv h0, v0.8h
+uminv s0, v0.4s
+umlal v0.2d, v0.2s, v0.2s
+umlal v0.4s, v0.4h, v0.4h
+umlal v0.8h, v0.8b, v0.8b
+umlal2 v0.2d, v0.4s, v0.4s
+umlal2 v0.4s, v0.8h, v0.8h
+umlal2 v0.8h, v0.16b, v0.16b
+umlsl v0.2d, v0.2s, v0.2s
+umlsl v0.4s, v0.4h, v0.4h
+umlsl v0.8h, v0.8b, v0.8b
+umlsl2 v0.2d, v0.4s, v0.4s
+umlsl2 v0.4s, v0.8h, v0.8h
+umlsl2 v0.8h, v0.16b, v0.16b
+ummla v0.4s, v0.16b, v0.16b
+umov w0, v0.b[1]
+umov w0, v0.h[1]
+umov w0, v0.s[1]
+umov x0, v0.d[1]
+umull v0.2d, v0.2s, v0.2s
+umull v0.4s, v0.4h, v0.4h
+umull v0.8h, v0.8b, v0.8b
+umull2 v0.2d, v0.4s, v0.4s
+umull2 v0.4s, v0.8h, v0.8h
+umull2 v0.8h, v0.16b, v0.16b
+uqadd h0, h1, h5
+uqadd v0.8h, v0.8h, v0.8h
+uqrshl b11, b20, b30
+uqrshl s23, s20, s16
+uqrshl v0.16b, v0.16b, v0.16b
+uqrshl v0.4s, v0.4s, v0.4s
+uqrshl v0.4s, v0.4s, v0.4s
+uqrshl v0.8h, v0.8h, v0.8h
+uqrshrn b10, h12, #5
+uqrshrn h12, s10, #14
+uqrshrn s10, d10, #25
+uqrshrn v0.2s, v0.2d, #3
+uqrshrn v0.4h, v0.4s, #3
+uqrshrn v0.8b, v0.8h, #3
+uqrshrn2 v0.16b, v0.8h, #3
+uqrshrn2 v0.4s, v0.2d, #3
+uqrshrn2 v0.8h, v0.4s, #3
+uqshl b11, b20, b30
+uqshl b18, b15, #6
+uqshl d15, d12, #19
+uqshl h11, h18, #7
+uqshl s14, s19, #18
+uqshl s23, s20, s16
+uqshl v0.16b, v0.16b, #3
+uqshl v0.16b, v0.16b, v0.16b
+uqshl v0.2d, v0.2d, #3
+uqshl v0.2d, v0.2d, v0.2d
+uqshl v0.2s, v0.2s, #3
+uqshl v0.4h, v0.4h, #3
+uqshl v0.4s, v0.4s, #3
+uqshl v0.4s, v0.4s, v0.4s
+uqshl v0.8b, v0.8b, #3
+uqshl v0.8h, v0.8h, #3
+uqshl v0.8h, v0.8h, v0.8h
+uqshrn b12, h10, #7
+uqshrn h10, s14, #5
+uqshrn s10, d12, #13
+uqshrn v0.2s, v0.2d, #3
+uqshrn v0.4h, v0.4s, #3
+uqshrn v0.8b, v0.8h, #3
+uqshrn2 v0.16b, v0.8h, #3
+uqshrn2 v0.4s, v0.2d, #3
+uqshrn2 v0.8h, v0.4s, #3
+uqsub d16, d16, d16
+uqsub v0.4h, v0.4h, v0.4h
+uqxtn b18, h18
+uqxtn h20, s17
+uqxtn s19, d14
+uqxtn v0.2s, v0.2d
+uqxtn v0.4h, v0.4s
+uqxtn v0.8b, v0.8h
+uqxtn2 v0.16b, v0.8h
+uqxtn2 v0.4s, v0.2d
+uqxtn2 v0.8h, v0.4s
+urecpe v0.2s, v0.2s
+urecpe v0.4s, v0.4s
+urhadd v0.16b, v0.16b, v0.16b
+urhadd v0.4s, v0.4s, v0.4s
+urhadd v0.8h, v0.8h, v0.8h
+urshl d8, d7, d4
+urshl v0.16b, v0.16b, v0.16b
+urshl v0.2d, v0.2d, v0.2d
+urshl v0.4s, v0.4s, v0.4s
+urshl v0.8h, v0.8h, v0.8h
+urshr d20, d23, #31
+urshr v0.16b, v0.16b, #3
+urshr v0.2d, v0.2d, #3
+urshr v0.2s, v0.2s, #3
+urshr v0.4h, v0.4h, #3
+urshr v0.4s, v0.4s, #3
+urshr v0.8b, v0.8b, #3
+urshr v0.8h, v0.8h, #3
+ursqrte v0.2s, v0.2s
+ursqrte v0.4s, v0.4s
+ursra d18, d10, #13
+ursra v0.16b, v0.16b, #3
+ursra v0.2d, v0.2d, #3
+ursra v0.2s, v0.2s, #3
+ursra v0.4h, v0.4h, #3
+ursra v0.4s, v0.4s, #3
+ursra v0.8b, v0.8b, #3
+ursra v0.8h, v0.8h, #3
+usdot v0.2s, v0.8b, v0.4b[2]
+usdot v0.2s, v0.8b, v0.8b
+usdot v0.4s, v0.16b, v0.16b
+usdot v0.4s, v0.16b, v0.4b[2]
+ushl d0, d0, d0
+ushl v0.16b, v0.16b, v0.16b
+ushl v0.4s, v0.4s, v0.4s
+ushl v0.8h, v0.8h, v0.8h
+ushll v0.4s, v0.4h, #3
+ushll2 v0.8h, v0.16b, #3
+ushr d10, d17, #18
+ushr v0.16b, v0.16b, #3
+ushr v0.2d, v0.2d, #3
+ushr v0.2s, v0.2s, #3
+ushr v0.4h, v0.4h, #3
+ushr v0.4s, v0.4s, #3
+ushr v0.8b, v0.8b, #3
+ushr v0.8h, v0.8h, #3
+usmmla v0.4s, v0.16b, v0.16b
+smov w0, v0.b[1]
+smov w0, v0.h[1]
+smov x0, v0.b[1]
+smov x0, v0.h[1]
+smov x0, v0.s[1]
+usqadd b19, b14
+usqadd d18, d22
+usqadd h20, h15
+usqadd s21, s12
+usqadd v0.16b, v0.16b
+usqadd v0.2d, v0.2d
+usqadd v0.2s, v0.2s
+usqadd v0.4h, v0.4h
+usqadd v0.4s, v0.4s
+usqadd v0.8b, v0.8b
+usqadd v0.8h, v0.8h
+usra d20, d13, #61
+usra v0.16b, v0.16b, #3
+usra v0.2d, v0.2d, #3
+usra v0.2s, v0.2s, #3
+usra v0.4h, v0.4h, #3
+usra v0.4s, v0.4s, #3
+usra v0.8b, v0.8b, #3
+usra v0.8h, v0.8h, #3
+usubl v0.2d, v0.2s, v0.2s
+usubl v0.4s, v0.4h, v0.4h
+usubl v0.8h, v0.8b, v0.8b
+usubl2 v0.2d, v0.4s, v0.4s
+usubl2 v0.4s, v0.8h, v0.8h
+usubl2 v0.8h, v0.16b, v0.16b
+usubw v0.2d, v0.2d, v0.2s
+usubw v0.4s, v0.4s, v0.4h
+usubw v0.8h, v0.8h, v0.8b
+usubw2 v0.2d, v0.2d, v0.4s
+usubw2 v0.4s, v0.4s, v0.8h
+usubw2 v0.8h, v0.8h, v0.16b
+uzp1 v0.16b, v0.16b, v0.16b
+uzp1 v0.2d, v0.2d, v0.2d
+uzp1 v0.2s, v0.2s, v0.2s
+uzp1 v0.4h, v0.4h, v0.4h
+uzp1 v0.4s, v0.4s, v0.4s
+uzp1 v0.8b, v0.8b, v0.8b
+uzp1 v0.8h, v0.8h, v0.8h
+uzp2 v0.16b, v0.16b, v0.16b
+uzp2 v0.2d, v0.2d, v0.2d
+uzp2 v0.2s, v0.2s, v0.2s
+uzp2 v0.4h, v0.4h, v0.4h
+uzp2 v0.4s, v0.4s, v0.4s
+uzp2 v0.8b, v0.8b, v0.8b
+uzp2 v0.8h, v0.8h, v0.8h
+xtn v0.2s, v0.2d
+xtn v0.4h, v0.4s
+xtn v0.8b, v0.8h
+xtn2 v0.16b, v0.8h
+xtn2 v0.4s, v0.2d
+xtn2 v0.8h, v0.4s
+zip1 v0.16b, v0.16b, v0.16b
+zip1 v0.2d, v0.2d, v0.2d
+zip1 v0.2s, v0.2s, v0.2s
+zip1 v0.4h, v0.4h, v0.4h
+zip1 v0.4s, v0.4s, v0.4s
+zip1 v0.8b, v0.8b, v0.8b
+zip1 v0.8h, v0.8h, v0.8h
+zip2 v0.16b, v0.16b, v0.16b
+zip2 v0.2d, v0.2d, v0.2d
+zip2 v0.2s, v0.2s, v0.2s
+zip2 v0.4h, v0.4h, v0.4h
+zip2 v0.4s, v0.4s, v0.4s
+zip2 v0.8b, v0.8b, v0.8b
+zip2 v0.8h, v0.8h, v0.8h
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 2 0.50 abs d29, d24
+# CHECK-NEXT: 1 2 0.50 abs v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 abs v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 abs v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 abs v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 abs v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 abs v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 abs v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 add d17, d31, d29
+# CHECK-NEXT: 1 2 0.50 add v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 addhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 addhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 addhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 addhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 addhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 addhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 addp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 addp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 1.00 addv s0, v0.4s
+# CHECK-NEXT: 1 3 1.00 addv h0, v0.4h
+# CHECK-NEXT: 2 5 1.00 addv h0, v0.8h
+# CHECK-NEXT: 2 5 1.00 addv b0, v0.8b
+# CHECK-NEXT: 2 6 2.00 addv b0, v0.16b
+# CHECK-NEXT: 1 2 0.50 aesd v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 aese v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 aesimc v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 aesmc v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 and v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 1.00 bfcvt h0, s0
+# CHECK-NEXT: 2 4 2.00 bfcvtn v0.4h, v0.4s
+# CHECK-NEXT: 2 4 2.00 bfcvtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 5 0.50 bfdot v0.2s, v0.4h, v0.4h
+# CHECK-NEXT: 1 5 0.50 bfdot v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 5 0.50 bfmlalb v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 5 0.50 bfmlalb v0.4s, v0.8h, v0.h[3]
+# CHECK-NEXT: 1 5 0.50 bfmlalt v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 5 0.50 bfmlalt v0.4s, v0.8h, v0.h[3]
+# CHECK-NEXT: 1 6 0.50 bfmmla v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 bic v0.4h, #15, lsl #8
+# CHECK-NEXT: 1 2 0.50 bic v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 bif v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 bit v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 bsl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 cls v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 cls v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 cls v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 cls v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 cls v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 cls v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 clz v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 clz v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 clz v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 clz v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 clz v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 clz v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 cmeq d20, d21, #0
+# CHECK-NEXT: 1 2 0.50 cmeq d20, d21, d22
+# CHECK-NEXT: 1 2 0.50 cmeq v0.16b, v0.16b, #0
+# CHECK-NEXT: 1 2 0.50 cmeq v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 cmge d20, d21, #0
+# CHECK-NEXT: 1 2 0.50 cmge d20, d21, d22
+# CHECK-NEXT: 1 2 0.50 cmge v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 cmge v0.8b, v0.8b, #0
+# CHECK-NEXT: 1 2 0.50 cmgt d20, d21, #0
+# CHECK-NEXT: 1 2 0.50 cmgt d20, d21, d22
+# CHECK-NEXT: 1 2 0.50 cmgt v0.2s, v0.2s, #0
+# CHECK-NEXT: 1 2 0.50 cmgt v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 cmhi d20, d21, d22
+# CHECK-NEXT: 1 2 0.50 cmhi v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 cmhs d20, d21, d22
+# CHECK-NEXT: 1 2 0.50 cmhs v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 cmle d20, d21, #0
+# CHECK-NEXT: 1 2 0.50 cmle v0.2d, v0.2d, #0
+# CHECK-NEXT: 1 2 0.50 cmlt d20, d21, #0
+# CHECK-NEXT: 1 2 0.50 cmlt v0.8h, v0.8h, #0
+# CHECK-NEXT: 1 2 0.50 cmtst d20, d21, d22
+# CHECK-NEXT: 1 2 0.50 cmtst v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 cnt v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 cnt v0.8b, v0.8b
+# CHECK-NEXT: 1 3 1.00 dup v0.16b, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.2d, x28
+# CHECK-NEXT: 1 3 1.00 dup v0.2s, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.4h, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.4s, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.8b, w28
+# CHECK-NEXT: 1 3 1.00 dup v0.8h, w28
+# CHECK-NEXT: 1 2 0.50 mov b0, v0.b[1]
+# CHECK-NEXT: 1 2 0.50 mov d0, v0.d[1]
+# CHECK-NEXT: 1 2 0.50 mov h0, v0.h[1]
+# CHECK-NEXT: 1 2 0.50 mov s0, v0.s[1]
+# CHECK-NEXT: 1 2 0.50 dup v0.16b, v0.b[1]
+# CHECK-NEXT: 1 2 0.50 dup v0.2d, v0.d[1]
+# CHECK-NEXT: 1 2 0.50 dup v0.2s, v0.s[1]
+# CHECK-NEXT: 1 2 0.50 dup v0.4h, v0.h[1]
+# CHECK-NEXT: 1 2 0.50 dup v0.4s, v0.s[1]
+# CHECK-NEXT: 1 2 0.50 dup v0.8b, v0.b[1]
+# CHECK-NEXT: 1 2 0.50 dup v0.8h, v0.h[1]
+# CHECK-NEXT: 1 2 0.50 eor v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 ext v0.16b, v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.50 ext v0.8b, v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 2 0.50 fabd d29, d24, d20
+# CHECK-NEXT: 1 2 0.50 fabd s29, s24, s20
+# CHECK-NEXT: 1 2 0.50 fabd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 fabs v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 fabs v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 fabs v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 fabs v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 fabs v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 facge d20, d21, d22
+# CHECK-NEXT: 1 2 0.50 facge s10, s11, s12
+# CHECK-NEXT: 1 2 0.50 facge v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 facgt d20, d21, d22
+# CHECK-NEXT: 1 2 0.50 facgt s10, s11, s12
+# CHECK-NEXT: 1 2 0.50 facgt v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 fadd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 faddp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 faddp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 fcadd v0.2s, v0.2s, v0.2s, #90
+# CHECK-NEXT: 1 2 0.50 fcadd v0.4s, v0.4s, v0.4s, #270
+# CHECK-NEXT: 1 2 0.50 fcmeq d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmeq d20, d21, d22
+# CHECK-NEXT: 1 2 0.50 fcmeq s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmeq s10, s11, s12
+# CHECK-NEXT: 1 2 0.50 fcmeq v0.2s, v0.2s, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmeq v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 fcmge d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmge d20, d21, d22
+# CHECK-NEXT: 1 2 0.50 fcmge s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmge s10, s11, s12
+# CHECK-NEXT: 1 2 0.50 fcmge v0.2d, v0.2d, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmge v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 fcmgt d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmgt d20, d21, d22
+# CHECK-NEXT: 1 2 0.50 fcmgt s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmgt s10, s11, s12
+# CHECK-NEXT: 1 2 0.50 fcmgt v0.4s, v0.4s, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmgt v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 fcmla v0.2s, v0.2s, v0.2s, #90
+# CHECK-NEXT: 1 4 0.50 fcmla v0.4s, v0.4s, v0.s[1], #0
+# CHECK-NEXT: 1 2 0.50 fcmle d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmle s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmle v0.2d, v0.2d, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmlt d20, d21, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmlt s10, s11, #0.0
+# CHECK-NEXT: 1 2 0.50 fcmlt v0.4s, v0.4s, #0.0
+# CHECK-NEXT: 1 3 1.00 fcvtas d21, d14
+# CHECK-NEXT: 2 4 2.00 fcvtas s12, s13
+# CHECK-NEXT: 4 6 4.00 fcvtas h12, h13
+# CHECK-NEXT: 1 3 1.00 fcvtas v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 fcvtas v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 fcvtas v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 fcvtas v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 fcvtas v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 fcvtau d21, d14
+# CHECK-NEXT: 2 4 2.00 fcvtau s12, s13
+# CHECK-NEXT: 4 6 4.00 fcvtau h12, h13
+# CHECK-NEXT: 1 3 1.00 fcvtau v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 fcvtau v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 fcvtau v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 fcvtau v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 fcvtau v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 fcvtl v0.2d, v0.2s
+# CHECK-NEXT: 2 4 2.00 fcvtl v0.4s, v0.4h
+# CHECK-NEXT: 1 3 1.00 fcvtl2 v0.2d, v0.4s
+# CHECK-NEXT: 2 4 2.00 fcvtl2 v0.4s, v0.8h
+# CHECK-NEXT: 1 3 1.00 fcvtms d21, d14
+# CHECK-NEXT: 2 4 2.00 fcvtms s22, s13
+# CHECK-NEXT: 4 6 4.00 fcvtms h22, h13
+# CHECK-NEXT: 1 3 1.00 fcvtms v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 fcvtms v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 fcvtms v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 fcvtms v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 fcvtms v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 fcvtmu d21, d14
+# CHECK-NEXT: 2 4 2.00 fcvtmu s12, s13
+# CHECK-NEXT: 4 6 4.00 fcvtmu h12, h13
+# CHECK-NEXT: 1 3 1.00 fcvtmu v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 fcvtmu v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 fcvtmu v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 fcvtmu v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 fcvtmu v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 fcvtn v0.2s, v0.2d
+# CHECK-NEXT: 2 4 2.00 fcvtn v0.4h, v0.4s
+# CHECK-NEXT: 1 3 1.00 fcvtn2 v0.4s, v0.2d
+# CHECK-NEXT: 2 4 2.00 fcvtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 3 1.00 fcvtns d21, d14
+# CHECK-NEXT: 2 4 2.00 fcvtns s22, s13
+# CHECK-NEXT: 4 6 4.00 fcvtns h22, h13
+# CHECK-NEXT: 1 3 1.00 fcvtns v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 fcvtns v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 fcvtns v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 fcvtns v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 fcvtns v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 fcvtnu d21, d14
+# CHECK-NEXT: 2 4 2.00 fcvtnu s12, s13
+# CHECK-NEXT: 4 6 4.00 fcvtnu h12, h13
+# CHECK-NEXT: 1 3 1.00 fcvtnu v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 fcvtnu v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 fcvtnu v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 fcvtnu v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 fcvtnu v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 fcvtps d21, d14
+# CHECK-NEXT: 2 4 2.00 fcvtps s22, s13
+# CHECK-NEXT: 4 6 4.00 fcvtps h22, h13
+# CHECK-NEXT: 1 3 1.00 fcvtps v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 fcvtps v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 fcvtps v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 fcvtps v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 fcvtps v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 fcvtpu d21, d14
+# CHECK-NEXT: 2 4 2.00 fcvtpu s12, s13
+# CHECK-NEXT: 4 6 4.00 fcvtpu h12, h13
+# CHECK-NEXT: 1 3 1.00 fcvtpu v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 fcvtpu v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 fcvtpu v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 fcvtpu v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 fcvtpu v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 fcvtxn s22, d13
+# CHECK-NEXT: 1 3 1.00 fcvtxn v0.2s, v0.2d
+# CHECK-NEXT: 1 3 1.00 fcvtxn2 v0.4s, v0.2d
+# CHECK-NEXT: 1 3 1.00 fcvtzs d21, d12, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs d21, d14
+# CHECK-NEXT: 2 4 2.00 fcvtzs s12, s13
+# CHECK-NEXT: 2 4 2.00 fcvtzs s21, s12, #1
+# CHECK-NEXT: 4 6 4.00 fcvtzs h21, h14
+# CHECK-NEXT: 4 6 4.00 fcvtzs h21, h12, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 fcvtzs v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 1.00 fcvtzs v0.2s, v0.2s
+# CHECK-NEXT: 1 3 1.00 fcvtzs v0.2s, v0.2s, #3
+# CHECK-NEXT: 2 4 2.00 fcvtzs v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 fcvtzs v0.4s, v0.4s
+# CHECK-NEXT: 2 4 2.00 fcvtzs v0.4s, v0.4s, #3
+# CHECK-NEXT: 4 6 4.00 fcvtzs v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 fcvtzu d21, d12, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu d21, d14
+# CHECK-NEXT: 2 4 2.00 fcvtzu s12, s13
+# CHECK-NEXT: 2 4 2.00 fcvtzu s21, s12, #1
+# CHECK-NEXT: 4 6 4.00 fcvtzu h12, h13
+# CHECK-NEXT: 4 6 4.00 fcvtzu h21, h12, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 fcvtzu v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 1.00 fcvtzu v0.2s, v0.2s
+# CHECK-NEXT: 1 3 1.00 fcvtzu v0.2s, v0.2s, #3
+# CHECK-NEXT: 2 4 2.00 fcvtzu v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 fcvtzu v0.4s, v0.4s
+# CHECK-NEXT: 2 4 2.00 fcvtzu v0.4s, v0.4s, #3
+# CHECK-NEXT: 4 6 4.00 fcvtzu v0.8h, v0.8h
+# CHECK-NEXT: 1 14 2.00 fdiv v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 9 2.00 fdiv v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 9 4.00 fdiv v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 11 4.00 fdiv v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 13 8.00 fdiv v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 fmax v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 fmax v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 fmax v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 fmaxnm v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 fmaxnm v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 fmaxnm v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 fmaxnmp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 fmaxnmp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 fmaxnmp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 fmaxp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 fmaxp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 fmaxp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 2 4 1.00 fmaxv h0, v0.4h
+# CHECK-NEXT: 3 6 1.50 fmaxv h0, v0.8h
+# CHECK-NEXT: 2 4 1.00 fmaxv s0, v0.4s
+# CHECK-NEXT: 1 2 0.50 fmin v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 fmin v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 fmin v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 fminnm v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 fminnm v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 fminnm v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 fminnmp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 fminnmp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 fminnmp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 fminp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 fminp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 fminp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 fmla d0, d1, v0.d[1]
+# CHECK-NEXT: 1 4 0.50 fmla s0, s1, v0.s[3]
+# CHECK-NEXT: 1 4 0.50 fmla v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 fmlal v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: 1 4 0.50 fmlal v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: 1 4 0.50 fmlal v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: 1 4 0.50 fmlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 fmlal2 v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: 1 4 0.50 fmlal2 v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: 1 4 0.50 fmlal2 v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: 1 4 0.50 fmlal2 v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 fmls d0, d4, v0.d[1]
+# CHECK-NEXT: 1 4 0.50 fmls s3, s5, v0.s[3]
+# CHECK-NEXT: 1 4 0.50 fmls v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 fmlsl v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: 1 4 0.50 fmlsl v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: 1 4 0.50 fmlsl v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: 1 4 0.50 fmlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 fmlsl2 v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: 1 4 0.50 fmlsl2 v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: 1 4 0.50 fmlsl2 v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: 1 4 0.50 fmlsl2 v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 fmov v0.2d, #-1.25000000
+# CHECK-NEXT: 1 2 0.50 fmov v0.2s, #13.00000000
+# CHECK-NEXT: 1 2 0.50 fmov v0.4s, #1.00000000
+# CHECK-NEXT: 1 3 0.50 fmul d0, d1, v0.d[1]
+# CHECK-NEXT: 1 3 0.50 fmul s0, s1, v0.s[3]
+# CHECK-NEXT: 1 3 0.50 fmul v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.50 fmulx d0, d4, v0.d[1]
+# CHECK-NEXT: 1 2 0.50 fmulx d23, d11, d1
+# CHECK-NEXT: 1 2 0.50 fmulx s20, s22, s15
+# CHECK-NEXT: 1 3 0.50 fmulx s3, s5, v0.s[3]
+# CHECK-NEXT: 1 3 0.50 fmulx v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 fmulx v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 0.50 fmulx v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 fneg v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 fneg v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 fneg v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 fneg v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 fneg v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 frecpe d13, d13
+# CHECK-NEXT: 1 3 1.00 frecpe s19, s14
+# CHECK-NEXT: 1 2 0.50 frecpe v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frecpe v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frecpe v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 frecpe v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 frecpe v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 frecps v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 frecps d22, d30, d21
+# CHECK-NEXT: 1 4 0.50 frecps s21, s16, s13
+# CHECK-NEXT: 1 3 1.00 frecpx d16, d19
+# CHECK-NEXT: 1 3 1.00 frecpx s18, s10
+# CHECK-NEXT: 1 3 1.00 frint32x v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frint32x v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frint32x v0.4s, v0.4s
+# CHECK-NEXT: 1 3 1.00 frint32z v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frint32z v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frint32z v0.4s, v0.4s
+# CHECK-NEXT: 1 3 1.00 frint64x v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frint64x v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frint64x v0.4s, v0.4s
+# CHECK-NEXT: 1 3 1.00 frint64z v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frint64z v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frint64z v0.4s, v0.4s
+# CHECK-NEXT: 1 3 1.00 frinta v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frinta v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frinta v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 frinta v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 frinta v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 frinti v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frinti v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frinti v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 frinti v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 frinti v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 frintm v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frintm v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frintm v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 frintm v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 frintm v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 frintn v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frintn v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frintn v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 frintn v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 frintn v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 frintp v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frintp v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frintp v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 frintp v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 frintp v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 frintx v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frintx v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frintx v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 frintx v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 frintx v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 frintz v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frintz v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frintz v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 frintz v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 frintz v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 frsqrte d21, d12
+# CHECK-NEXT: 1 3 1.00 frsqrte s22, s13
+# CHECK-NEXT: 1 2 0.50 frsqrte v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frsqrte v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 frsqrte v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 frsqrte v0.4s, v0.4s
+# CHECK-NEXT: 4 6 4.00 frsqrte v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 frsqrts d8, d22, d18
+# CHECK-NEXT: 1 4 0.50 frsqrts s21, s5, s12
+# CHECK-NEXT: 1 4 0.50 frsqrts v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 14 2.00 fsqrt v0.2d, v0.2d
+# CHECK-NEXT: 1 9 2.00 fsqrt v0.2s, v0.2s
+# CHECK-NEXT: 1 9 4.00 fsqrt v0.4h, v0.4h
+# CHECK-NEXT: 1 11 4.00 fsqrt v0.4s, v0.4s
+# CHECK-NEXT: 1 13 8.00 fsqrt v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 fsub v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 6 0.33 * ld1 { v0.16b }, [x0]
+# CHECK-NEXT: 4 6 1.00 * ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK-NEXT: 4 7 1.33 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+# CHECK-NEXT: 3 6 0.67 * ld1 { v0.4s, v1.4s }, [sp], #32
+# CHECK-NEXT: 3 6 1.00 * ld1 { v0.4s, v1.4s, v2.4s }, [sp]
+# CHECK-NEXT: 5 7 1.33 * ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+# CHECK-NEXT: 2 6 0.33 * ld1 { v0.8h }, [x15], x2
+# CHECK-NEXT: 2 6 0.67 * ld1 { v0.8h, v1.8h }, [x15]
+# CHECK-NEXT: 2 8 0.50 * ld1 { v0.b }[9], [x0]
+# CHECK-NEXT: 3 8 0.50 * ld1 { v0.b }[9], [x0], #1
+# CHECK-NEXT: 2 8 0.50 * ld1r { v0.16b }, [x0]
+# CHECK-NEXT: 3 8 0.50 * ld1r { v0.16b }, [x0], #1
+# CHECK-NEXT: 2 8 0.50 * ld1r { v0.8h }, [x15]
+# CHECK-NEXT: 3 8 0.50 * ld1r { v0.8h }, [x15], #2
+# CHECK-NEXT: 5 8 1.00 * ld2 { v0.16b, v1.16b }, [x0], x1
+# CHECK-NEXT: 3 8 1.00 * ld2 { v0.8b, v1.8b }, [x0]
+# CHECK-NEXT: 3 8 1.00 * ld2 { v0.h, v1.h }[7], [x15]
+# CHECK-NEXT: 4 8 1.00 * ld2 { v0.h, v1.h }[7], [x15], #4
+# CHECK-NEXT: 3 8 1.00 * ld2r { v0.2d, v1.2d }, [x0]
+# CHECK-NEXT: 4 8 1.00 * ld2r { v0.2d, v1.2d }, [x0], #16
+# CHECK-NEXT: 3 8 1.00 * ld2r { v0.4s, v1.4s }, [sp]
+# CHECK-NEXT: 4 8 1.00 * ld2r { v0.4s, v1.4s }, [sp], #8
+# CHECK-NEXT: 5 8 1.50 * ld3 { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: 7 8 1.50 * ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+# CHECK-NEXT: 5 8 1.50 * ld3 { v0.s, v1.s, v2.s }[3], [sp]
+# CHECK-NEXT: 6 8 1.50 * ld3 { v0.s, v1.s, v2.s }[3], [sp], x3
+# CHECK-NEXT: 5 8 1.50 * ld3r { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: 6 8 1.50 * ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6
+# CHECK-NEXT: 5 8 1.50 * ld3r { v0.8b, v1.8b, v2.8b }, [x0]
+# CHECK-NEXT: 6 8 1.50 * ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3
+# CHECK-NEXT: 7 8 2.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: 11 9 2.00 * ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: 7 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
+# CHECK-NEXT: 8 8 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32
+# CHECK-NEXT: 8 8 2.00 * ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
+# CHECK-NEXT: 7 8 2.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp]
+# CHECK-NEXT: 8 8 2.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7
+# CHECK-NEXT: 7 8 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: 8 8 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30
+# CHECK-NEXT: 1 4 1.00 mla v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 1.00 mls v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 mov b0, v0.b[15]
+# CHECK-NEXT: 1 2 0.50 mov d6, v0.d[1]
+# CHECK-NEXT: 1 2 0.50 mov h2, v0.h[5]
+# CHECK-NEXT: 1 2 0.50 mov s17, v0.s[2]
+# CHECK-NEXT: 1 2 0.50 mov v2.b[0], v0.b[0]
+# CHECK-NEXT: 1 2 0.50 mov v2.h[1], v0.h[1]
+# CHECK-NEXT: 1 2 0.50 mov v2.s[2], v0.s[2]
+# CHECK-NEXT: 1 2 0.50 mov v2.d[1], v0.d[1]
+# CHECK-NEXT: 2 5 1.00 mov v0.b[0], w8
+# CHECK-NEXT: 2 5 1.00 mov v0.h[1], w8
+# CHECK-NEXT: 2 5 1.00 mov v0.s[2], w8
+# CHECK-NEXT: 2 5 1.00 mov v0.d[1], x8
+# CHECK-NEXT: 1 2 0.50 mov v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 mov v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 movi d15, #0xff00ff00ff00ff
+# CHECK-NEXT: 1 2 0.50 movi v0.16b, #31
+# CHECK-NEXT: 1 2 0.50 movi v0.2d, #0xff0000ff0000ffff
+# CHECK-NEXT: 1 2 0.50 movi v0.2s, #8, msl #8
+# CHECK-NEXT: 1 2 0.50 movi v0.4s, #255, lsl #24
+# CHECK-NEXT: 1 2 0.50 movi v0.8b, #255
+# CHECK-NEXT: 1 4 1.00 mul v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 mvni v0.2s, #0
+# CHECK-NEXT: 1 2 0.50 mvni v0.4s, #16, msl #16
+# CHECK-NEXT: 1 2 0.50 neg d29, d24
+# CHECK-NEXT: 1 2 0.50 neg v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 neg v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 neg v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 neg v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 neg v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 neg v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 neg v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 mvn v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 mvn v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 orn v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 mov v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 orr v0.8h, #31
+# CHECK-NEXT: 1 3 0.50 pmul v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.50 pmul v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.50 pmull v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.50 pmull2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 raddhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 raddhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 raddhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 raddhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 raddhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 raddhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 rbit v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 rbit v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 rev16 v21.8b, v1.8b
+# CHECK-NEXT: 1 2 0.50 rev16 v30.16b, v31.16b
+# CHECK-NEXT: 1 2 0.50 rev32 v0.4h, v9.4h
+# CHECK-NEXT: 1 2 0.50 rev32 v21.8b, v1.8b
+# CHECK-NEXT: 1 2 0.50 rev32 v30.16b, v31.16b
+# CHECK-NEXT: 1 2 0.50 rev32 v4.8h, v7.8h
+# CHECK-NEXT: 1 2 0.50 rev64 v0.16b, v31.16b
+# CHECK-NEXT: 1 2 0.50 rev64 v1.8b, v9.8b
+# CHECK-NEXT: 1 2 0.50 rev64 v13.4h, v21.4h
+# CHECK-NEXT: 1 2 0.50 rev64 v2.8h, v4.8h
+# CHECK-NEXT: 1 2 0.50 rev64 v4.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 rev64 v6.4s, v8.4s
+# CHECK-NEXT: 1 4 0.50 rshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 rshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 rshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 rshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 rshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 rshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 2 0.50 rsubhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 rsubhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 rsubhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 rsubhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 rsubhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 rsubhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 saba v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.50 sabal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 sabal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 sabal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.50 sabal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 sabal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 sabal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 sabd v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 sabdl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 sabdl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 sabdl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 sabdl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 sabdl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 sabdl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.50 sadalp v0.1d, v0.2s
+# CHECK-NEXT: 1 4 0.50 sadalp v0.2d, v0.4s
+# CHECK-NEXT: 1 4 0.50 sadalp v0.2s, v0.4h
+# CHECK-NEXT: 1 4 0.50 sadalp v0.4h, v0.8b
+# CHECK-NEXT: 1 4 0.50 sadalp v0.4s, v0.8h
+# CHECK-NEXT: 1 4 0.50 sadalp v0.8h, v0.16b
+# CHECK-NEXT: 1 2 0.50 saddl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 saddl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 saddl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 saddl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 saddl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 saddl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 saddlp v0.1d, v0.2s
+# CHECK-NEXT: 1 2 0.50 saddlp v0.2d, v0.4s
+# CHECK-NEXT: 1 2 0.50 saddlp v0.2s, v0.4h
+# CHECK-NEXT: 1 2 0.50 saddlp v0.4h, v0.8b
+# CHECK-NEXT: 1 2 0.50 saddlp v0.4s, v0.8h
+# CHECK-NEXT: 1 2 0.50 saddlp v0.8h, v0.16b
+# CHECK-NEXT: 1 3 1.00 saddlv d0, v0.4s
+# CHECK-NEXT: 1 3 1.00 saddlv s0, v0.4h
+# CHECK-NEXT: 2 5 1.00 saddlv s0, v0.8h
+# CHECK-NEXT: 2 5 1.00 saddlv h0, v0.8b
+# CHECK-NEXT: 2 6 2.00 saddlv h0, v0.16b
+# CHECK-NEXT: 1 2 0.50 saddw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: 1 2 0.50 saddw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: 1 2 0.50 saddw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: 1 2 0.50 saddw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: 1 2 0.50 saddw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: 1 2 0.50 saddw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: 1 3 1.00 scvtf d21, d12
+# CHECK-NEXT: 1 3 1.00 scvtf d21, d12, #64
+# CHECK-NEXT: 2 4 2.00 scvtf s22, s13
+# CHECK-NEXT: 2 4 2.00 scvtf s22, s13, #32
+# CHECK-NEXT: 1 3 1.00 scvtf v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 scvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 1.00 scvtf v0.2s, v0.2s
+# CHECK-NEXT: 1 3 1.00 scvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: 2 4 2.00 scvtf v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 scvtf v0.4s, v0.4s
+# CHECK-NEXT: 2 4 2.00 scvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: 4 6 4.00 scvtf v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 sdot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: 1 3 0.50 sdot v0.2s, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.50 sdot v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.50 sdot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: 1 2 0.50 shadd v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 shl d7, d10, #12
+# CHECK-NEXT: 1 2 0.50 shl v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.50 shl v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 2 0.50 shl v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 2 0.50 shl v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 2 0.50 shll v0.2d, v0.2s, #32
+# CHECK-NEXT: 1 2 0.50 shll v0.4s, v0.4h, #16
+# CHECK-NEXT: 1 2 0.50 shll v0.8h, v0.8b, #8
+# CHECK-NEXT: 1 2 0.50 shll v0.2d, v0.2s, #32
+# CHECK-NEXT: 1 2 0.50 shll v0.4s, v0.4h, #16
+# CHECK-NEXT: 1 2 0.50 shll v0.8h, v0.8b, #8
+# CHECK-NEXT: 1 2 0.50 shll2 v0.2d, v0.4s, #32
+# CHECK-NEXT: 1 2 0.50 shll2 v0.4s, v0.8h, #16
+# CHECK-NEXT: 1 2 0.50 shll2 v0.8h, v0.16b, #8
+# CHECK-NEXT: 1 2 0.50 shll2 v0.2d, v0.4s, #32
+# CHECK-NEXT: 1 2 0.50 shll2 v0.4s, v0.8h, #16
+# CHECK-NEXT: 1 2 0.50 shll2 v0.8h, v0.16b, #8
+# CHECK-NEXT: 1 2 0.50 shrn v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 2 0.50 shrn v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 2 0.50 shrn v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 2 0.50 shrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 2 0.50 shrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 2 0.50 shrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 2 0.50 shsub v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 shsub v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 sli d10, d14, #12
+# CHECK-NEXT: 1 2 0.50 sli v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.50 sli v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 2 0.50 sli v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 2 0.50 sli v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 2 0.50 sli v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 2 0.50 sli v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 2 0.50 sli v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 2 0.50 smax v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 smax v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 smax v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 smaxp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 smaxp v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 smaxp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 2 5 1.00 smaxv b0, v0.8b
+# CHECK-NEXT: 2 6 2.00 smaxv b0, v0.16b
+# CHECK-NEXT: 1 3 1.00 smaxv h0, v0.4h
+# CHECK-NEXT: 2 5 1.00 smaxv h0, v0.8h
+# CHECK-NEXT: 1 3 1.00 smaxv s0, v0.4s
+# CHECK-NEXT: 1 2 0.50 smin v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 smin v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 smin v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 sminp v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 sminp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 sminp v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 2 5 1.00 sminv b0, v0.8b
+# CHECK-NEXT: 2 6 2.00 sminv b0, v0.16b
+# CHECK-NEXT: 1 3 1.00 sminv h0, v0.4h
+# CHECK-NEXT: 2 5 1.00 sminv h0, v0.8h
+# CHECK-NEXT: 1 3 1.00 sminv s0, v0.4s
+# CHECK-NEXT: 1 4 1.00 smlal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 1.00 smlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 smlal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 1.00 smlal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 1.00 smlal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 smlal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 1.00 smlsl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 1.00 smlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 smlsl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 1.00 smlsl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 1.00 smlsl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 smlsl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.50 smmla v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 1.00 smull v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 1.00 smull v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 3 1.00 smull v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 1.00 smull2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 1.00 smull2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 smull2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 sqabs b19, b14
+# CHECK-NEXT: 1 2 0.50 sqabs d18, d12
+# CHECK-NEXT: 1 2 0.50 sqabs h21, h15
+# CHECK-NEXT: 1 2 0.50 sqabs s20, s12
+# CHECK-NEXT: 1 2 0.50 sqabs v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 sqabs v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 sqabs v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 sqabs v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 sqabs v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 sqabs v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 sqabs v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 sqadd b20, b11, b15
+# CHECK-NEXT: 1 2 0.50 sqadd v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 sqadd v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 1.00 sqdmlal d19, s24, s12
+# CHECK-NEXT: 1 4 1.00 sqdmlal d8, s9, v0.s[1]
+# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[3]
+# CHECK-NEXT: 1 4 1.00 sqdmlal s17, h27, h12
+# CHECK-NEXT: 1 4 1.00 sqdmlal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 1.00 sqdmlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 sqdmlsl d12, s23, s13
+# CHECK-NEXT: 1 4 1.00 sqdmlsl d8, s9, v0.s[1]
+# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[3]
+# CHECK-NEXT: 1 4 1.00 sqdmlsl s14, h12, h25
+# CHECK-NEXT: 1 4 1.00 sqdmlsl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 1.00 sqdmlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 sqdmulh h10, h11, h12
+# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v0.h[3]
+# CHECK-NEXT: 1 4 1.00 sqdmulh s15, s14, v0.s[1]
+# CHECK-NEXT: 1 4 1.00 sqdmulh s20, s21, s2
+# CHECK-NEXT: 1 4 1.00 sqdmulh v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 1.00 sqdmulh v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 1.00 sqdmull d1, s1, v0.s[1]
+# CHECK-NEXT: 1 3 1.00 sqdmull d15, s22, s12
+# CHECK-NEXT: 1 3 1.00 sqdmull s1, h1, v0.h[3]
+# CHECK-NEXT: 1 3 1.00 sqdmull s12, h22, h12
+# CHECK-NEXT: 1 3 1.00 sqdmull v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 1.00 sqdmull v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 3 1.00 sqdmull2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 1.00 sqdmull2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 sqneg b19, b14
+# CHECK-NEXT: 1 2 0.50 sqneg d18, d12
+# CHECK-NEXT: 1 2 0.50 sqneg h21, h15
+# CHECK-NEXT: 1 2 0.50 sqneg s20, s12
+# CHECK-NEXT: 1 2 0.50 sqneg v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 sqneg v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 sqneg v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 sqneg v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 sqneg v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 sqneg v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 sqneg v0.8h, v0.8h
+# CHECK-NEXT: 1 4 2.00 sqrdmlah h0, h1, v2.h[3]
+# CHECK-NEXT: 1 4 2.00 sqrdmlah v0.4h, v1.4h, v2.h[3]
+# CHECK-NEXT: 1 4 2.00 sqrdmlah v0.8h, v1.8h, v2.h[3]
+# CHECK-NEXT: 1 4 2.00 sqrdmlah s0, s1, v2.s[1]
+# CHECK-NEXT: 1 4 2.00 sqrdmlah v0.2s, v1.2s, v2.s[1]
+# CHECK-NEXT: 1 4 2.00 sqrdmlah v0.4s, v1.4s, v2.s[1]
+# CHECK-NEXT: 1 4 2.00 sqrdmlah h0, h1, h2
+# CHECK-NEXT: 1 4 2.00 sqrdmlah v0.4h, v1.4h, v2.4h
+# CHECK-NEXT: 1 4 2.00 sqrdmlah v0.8h, v1.8h, v2.8h
+# CHECK-NEXT: 1 4 2.00 sqrdmlah s0, s1, s2
+# CHECK-NEXT: 1 4 2.00 sqrdmlah v0.2s, v1.2s, v2.2s
+# CHECK-NEXT: 1 4 2.00 sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 1 4 2.00 sqrdmlsh h0, h1, v2.h[3]
+# CHECK-NEXT: 1 4 2.00 sqrdmlsh v0.4h, v1.4h, v2.h[3]
+# CHECK-NEXT: 1 4 2.00 sqrdmlsh v0.8h, v1.8h, v2.h[3]
+# CHECK-NEXT: 1 4 2.00 sqrdmlsh s0, s1, v2.s[1]
+# CHECK-NEXT: 1 4 2.00 sqrdmlsh v0.2s, v1.2s, v2.s[1]
+# CHECK-NEXT: 1 4 2.00 sqrdmlsh v0.4s, v1.4s, v2.s[1]
+# CHECK-NEXT: 1 4 2.00 sqrdmlsh h0, h1, h2
+# CHECK-NEXT: 1 4 2.00 sqrdmlsh v0.4h, v1.4h, v2.4h
+# CHECK-NEXT: 1 4 2.00 sqrdmlsh v0.8h, v1.8h, v2.8h
+# CHECK-NEXT: 1 4 2.00 sqrdmlsh s0, s1, s2
+# CHECK-NEXT: 1 4 2.00 sqrdmlsh v0.2s, v1.2s, v2.2s
+# CHECK-NEXT: 1 4 2.00 sqrdmlsh v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: 1 4 1.00 sqrdmulh h10, h11, h12
+# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v0.h[3]
+# CHECK-NEXT: 1 4 1.00 sqrdmulh s15, s14, v0.s[1]
+# CHECK-NEXT: 1 4 1.00 sqrdmulh s20, s21, s2
+# CHECK-NEXT: 1 4 1.00 sqrdmulh v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 sqrdmulh v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 sqrshl d31, d31, d31
+# CHECK-NEXT: 1 4 0.50 sqrshl h3, h4, h15
+# CHECK-NEXT: 1 4 0.50 sqrshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 sqrshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 sqrshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.50 sqrshrn b10, h13, #2
+# CHECK-NEXT: 1 4 0.50 sqrshrn h15, s10, #6
+# CHECK-NEXT: 1 4 0.50 sqrshrn s15, d12, #9
+# CHECK-NEXT: 1 4 0.50 sqrshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 sqrshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 sqrshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 sqrshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 sqrshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 sqrshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 sqrshrun b17, h10, #6
+# CHECK-NEXT: 1 4 0.50 sqrshrun h10, s13, #15
+# CHECK-NEXT: 1 4 0.50 sqrshrun s22, d16, #31
+# CHECK-NEXT: 1 4 0.50 sqrshrun v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 sqrshrun v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 sqrshrun v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 sqrshrun2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 sqrshrun2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 sqrshrun2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 sqshl b11, b19, #7
+# CHECK-NEXT: 1 4 0.50 sqshl d15, d16, #51
+# CHECK-NEXT: 1 4 0.50 sqshl d31, d31, d31
+# CHECK-NEXT: 1 4 0.50 sqshl h13, h18, #11
+# CHECK-NEXT: 1 4 0.50 sqshl h3, h4, h15
+# CHECK-NEXT: 1 4 0.50 sqshl s14, s17, #22
+# CHECK-NEXT: 1 4 0.50 sqshl v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.50 sqshl v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 sqshl v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.50 sqshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 sqshl v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.50 sqshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 sqshl v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 sqshl v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.50 sqshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.50 sqshl v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 sqshlu b15, b18, #6
+# CHECK-NEXT: 1 4 0.50 sqshlu d11, d13, #32
+# CHECK-NEXT: 1 4 0.50 sqshlu h19, h17, #6
+# CHECK-NEXT: 1 4 0.50 sqshlu s16, s14, #25
+# CHECK-NEXT: 1 4 0.50 sqshlu v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.50 sqshlu v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 sqshlu v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.50 sqshlu v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.50 sqshlu v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 sqshlu v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.50 sqshlu v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 sqshrn b10, h15, #5
+# CHECK-NEXT: 1 4 0.50 sqshrn h17, s10, #4
+# CHECK-NEXT: 1 4 0.50 sqshrn s18, d10, #31
+# CHECK-NEXT: 1 4 0.50 sqshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 sqshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 sqshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 sqshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 sqshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 sqshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun b15, h10, #7
+# CHECK-NEXT: 1 4 0.50 sqshrun h20, s14, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun s10, d15, #15
+# CHECK-NEXT: 1 4 0.50 sqshrun v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 sqshrun2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 2 0.50 sqsub s20, s10, s7
+# CHECK-NEXT: 1 2 0.50 sqsub v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 sqsub v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 sqsub v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.50 sqxtn b18, h18
+# CHECK-NEXT: 1 4 0.50 sqxtn h20, s17
+# CHECK-NEXT: 1 4 0.50 sqxtn s19, d14
+# CHECK-NEXT: 1 4 0.50 sqxtn v0.2s, v0.2d
+# CHECK-NEXT: 1 4 0.50 sqxtn v0.4h, v0.4s
+# CHECK-NEXT: 1 4 0.50 sqxtn v0.8b, v0.8h
+# CHECK-NEXT: 1 4 0.50 sqxtn2 v0.16b, v0.8h
+# CHECK-NEXT: 1 4 0.50 sqxtn2 v0.4s, v0.2d
+# CHECK-NEXT: 1 4 0.50 sqxtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 4 0.50 sqxtun b19, h14
+# CHECK-NEXT: 1 4 0.50 sqxtun h21, s15
+# CHECK-NEXT: 1 4 0.50 sqxtun s20, d12
+# CHECK-NEXT: 1 4 0.50 sqxtun v0.2s, v0.2d
+# CHECK-NEXT: 1 4 0.50 sqxtun v0.4h, v0.4s
+# CHECK-NEXT: 1 4 0.50 sqxtun v0.8b, v0.8h
+# CHECK-NEXT: 1 4 0.50 sqxtun2 v0.16b, v0.8h
+# CHECK-NEXT: 1 4 0.50 sqxtun2 v0.4s, v0.2d
+# CHECK-NEXT: 1 4 0.50 sqxtun2 v0.8h, v0.4s
+# CHECK-NEXT: 1 2 0.50 srhadd v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 srhadd v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 srhadd v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 sri d10, d12, #14
+# CHECK-NEXT: 1 2 0.50 sri v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.50 sri v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 2 0.50 sri v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 2 0.50 sri v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 2 0.50 sri v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 2 0.50 sri v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 2 0.50 sri v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 srshl d16, d16, d16
+# CHECK-NEXT: 1 4 0.50 srshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 srshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 srshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.50 srshr d19, d18, #7
+# CHECK-NEXT: 1 4 0.50 srshr v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.50 srshr v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 srshr v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.50 srshr v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.50 srshr v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 srshr v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.50 srshr v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 srsra d15, d11, #19
+# CHECK-NEXT: 1 4 0.50 srsra v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.50 srsra v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 srsra v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.50 srsra v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.50 srsra v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 srsra v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.50 srsra v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 2 0.50 sshl d31, d31, d31
+# CHECK-NEXT: 1 2 0.50 sshl v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 sshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 sshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 sshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 sshll v0.2d, v0.2s, #3
+# CHECK-NEXT: 1 2 0.50 sshll2 v0.4s, v0.8h, #3
+# CHECK-NEXT: 1 2 0.50 sshr d15, d16, #12
+# CHECK-NEXT: 1 2 0.50 sshr v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.50 sshr v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 2 0.50 sshr v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 2 0.50 sshr v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 2 0.50 sshr v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 2 0.50 sshr v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 2 0.50 sshr v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 ssra d18, d12, #21
+# CHECK-NEXT: 1 4 0.50 ssra v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.50 ssra v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 ssra v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.50 ssra v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.50 ssra v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 ssra v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.50 ssra v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 2 0.50 ssubl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 ssubl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 ssubl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 ssubl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 ssubl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 ssubl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 ssubw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: 1 2 0.50 ssubw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: 1 2 0.50 ssubw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: 1 2 0.50 ssubw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: 1 2 0.50 ssubw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: 1 2 0.50 ssubw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: 2 2 0.50 * st1 { v0.16b }, [x0]
+# CHECK-NEXT: 7 2 1.50 * st1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK-NEXT: 8 2 2.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+# CHECK-NEXT: 5 2 1.00 * st1 { v0.4s, v1.4s }, [sp], #32
+# CHECK-NEXT: 6 2 1.50 * st1 { v0.4s, v1.4s, v2.4s }, [sp]
+# CHECK-NEXT: 5 2 1.00 * st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+# CHECK-NEXT: 3 2 0.50 * st1 { v0.8h }, [x15], x2
+# CHECK-NEXT: 4 2 1.00 * st1 { v0.8h, v1.8h }, [x15]
+# CHECK-NEXT: 3 4 1.00 * st1 { v0.d }[1], [x0]
+# CHECK-NEXT: 4 4 1.00 * st1 { v0.d }[1], [x0], #8
+# CHECK-NEXT: 7 4 2.00 * st2 { v0.16b, v1.16b }, [x0], x1
+# CHECK-NEXT: 3 4 1.00 * st2 { v0.8b, v1.8b }, [x0]
+# CHECK-NEXT: 3 4 1.00 * st2 { v0.s, v1.s }[3], [sp]
+# CHECK-NEXT: 4 4 1.00 * st2 { v0.s, v1.s }[3], [sp], #8
+# CHECK-NEXT: 6 5 2.00 * st3 { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: 10 6 3.00 * st3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+# CHECK-NEXT: 6 5 2.00 * st3 { v0.h, v1.h, v2.h }[7], [x15]
+# CHECK-NEXT: 7 5 2.00 * st3 { v0.h, v1.h, v2.h }[7], [x15], #6
+# CHECK-NEXT: 8 6 3.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: 17 7 6.00 * st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: 4 6 1.50 * st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0]
+# CHECK-NEXT: 7 4 2.00 * st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], x5
+# CHECK-NEXT: 1 2 0.50 sub d15, d5, d16
+# CHECK-NEXT: 1 2 0.50 sub v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 3 0.50 sudot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: 1 3 0.50 sudot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: 1 2 0.50 suqadd b19, b14
+# CHECK-NEXT: 1 2 0.50 suqadd d18, d22
+# CHECK-NEXT: 1 2 0.50 suqadd h20, h15
+# CHECK-NEXT: 1 2 0.50 suqadd s21, s12
+# CHECK-NEXT: 1 2 0.50 suqadd v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 suqadd v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 suqadd v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 suqadd v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 suqadd v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 suqadd v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 suqadd v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 tbl v0.16b, { v0.16b }, v0.16b
+# CHECK-NEXT: 1 2 0.50 tbl v0.16b, { v0.16b, v1.16b }, v0.16b
+# CHECK-NEXT: 2 4 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+# CHECK-NEXT: 3 4 1.50 tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+# CHECK-NEXT: 1 2 0.50 tbl v0.8b, { v0.16b }, v0.8b
+# CHECK-NEXT: 1 2 0.50 tbl v0.8b, { v0.16b, v1.16b }, v0.8b
+# CHECK-NEXT: 2 4 1.00 tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+# CHECK-NEXT: 3 4 1.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+# CHECK-NEXT: 1 2 0.50 tbx v0.16b, { v0.16b }, v0.16b
+# CHECK-NEXT: 2 4 1.00 tbx v0.16b, { v0.16b, v1.16b }, v0.16b
+# CHECK-NEXT: 3 6 1.50 tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+# CHECK-NEXT: 5 6 2.50 tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+# CHECK-NEXT: 1 2 0.50 tbx v0.8b, { v0.16b }, v0.8b
+# CHECK-NEXT: 2 4 1.00 tbx v0.8b, { v0.16b, v1.16b }, v0.8b
+# CHECK-NEXT: 3 6 1.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+# CHECK-NEXT: 5 6 2.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+# CHECK-NEXT: 1 2 0.50 trn1 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 trn1 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 trn1 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 trn1 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 trn1 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 trn1 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 trn1 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 trn2 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 trn2 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 trn2 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 trn2 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 trn2 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 trn2 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 trn2 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 uaba v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.50 uabal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 0.50 uabal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 uabal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 0.50 uabal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 uabal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 uabal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 uabd v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 uabdl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 uabdl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 uabdl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 uabdl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 uabdl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 uabdl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.50 uadalp v0.1d, v0.2s
+# CHECK-NEXT: 1 4 0.50 uadalp v0.2d, v0.4s
+# CHECK-NEXT: 1 4 0.50 uadalp v0.2s, v0.4h
+# CHECK-NEXT: 1 4 0.50 uadalp v0.4h, v0.8b
+# CHECK-NEXT: 1 4 0.50 uadalp v0.4s, v0.8h
+# CHECK-NEXT: 1 4 0.50 uadalp v0.8h, v0.16b
+# CHECK-NEXT: 1 2 0.50 uaddl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 uaddl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 uaddl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 uaddl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 uaddl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 uaddl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 uaddlp v0.1d, v0.2s
+# CHECK-NEXT: 1 2 0.50 uaddlp v0.2d, v0.4s
+# CHECK-NEXT: 1 2 0.50 uaddlp v0.2s, v0.4h
+# CHECK-NEXT: 1 2 0.50 uaddlp v0.4h, v0.8b
+# CHECK-NEXT: 1 2 0.50 uaddlp v0.4s, v0.8h
+# CHECK-NEXT: 1 2 0.50 uaddlp v0.8h, v0.16b
+# CHECK-NEXT: 1 3 1.00 uaddlv d0, v0.4s
+# CHECK-NEXT: 1 3 1.00 uaddlv s0, v0.4h
+# CHECK-NEXT: 2 5 1.00 uaddlv s0, v0.8h
+# CHECK-NEXT: 2 5 1.00 uaddlv h0, v0.8b
+# CHECK-NEXT: 2 6 2.00 uaddlv h0, v0.16b
+# CHECK-NEXT: 1 2 0.50 uaddw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: 1 2 0.50 uaddw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: 1 2 0.50 uaddw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: 1 2 0.50 uaddw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: 1 2 0.50 uaddw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: 1 2 0.50 uaddw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: 1 3 1.00 ucvtf d21, d14
+# CHECK-NEXT: 1 3 1.00 ucvtf d21, d14, #64
+# CHECK-NEXT: 2 4 2.00 ucvtf s22, s13
+# CHECK-NEXT: 2 4 2.00 ucvtf s22, s13, #32
+# CHECK-NEXT: 1 3 1.00 ucvtf v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 ucvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 1.00 ucvtf v0.2s, v0.2s
+# CHECK-NEXT: 1 3 1.00 ucvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: 2 4 2.00 ucvtf v0.4h, v0.4h
+# CHECK-NEXT: 2 4 2.00 ucvtf v0.4s, v0.4s
+# CHECK-NEXT: 2 4 2.00 ucvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: 4 6 4.00 ucvtf v0.8h, v0.8h
+# CHECK-NEXT: 1 3 0.50 udot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: 1 3 0.50 udot v0.2s, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.50 udot v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.50 udot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: 1 2 0.50 uhadd v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 uhadd v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 uhsub v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 umax v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 umax v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 umax v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 umaxp v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 umaxp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 umaxp v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 2 5 1.00 umaxv b0, v0.8b
+# CHECK-NEXT: 2 6 2.00 umaxv b0, v0.16b
+# CHECK-NEXT: 1 3 1.00 umaxv h0, v0.4h
+# CHECK-NEXT: 2 5 1.00 umaxv h0, v0.8h
+# CHECK-NEXT: 1 3 1.00 umaxv s0, v0.4s
+# CHECK-NEXT: 1 2 0.50 umin v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 umin v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 umin v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 uminp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 uminp v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 uminp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 2 5 1.00 uminv b0, v0.8b
+# CHECK-NEXT: 2 6 2.00 uminv b0, v0.16b
+# CHECK-NEXT: 1 3 1.00 uminv h0, v0.4h
+# CHECK-NEXT: 2 5 1.00 uminv h0, v0.8h
+# CHECK-NEXT: 1 3 1.00 uminv s0, v0.4s
+# CHECK-NEXT: 1 4 1.00 umlal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 1.00 umlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 umlal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 1.00 umlal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 1.00 umlal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 umlal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 1.00 umlsl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 4 1.00 umlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 1.00 umlsl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 4 1.00 umlsl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 1.00 umlsl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 1.00 umlsl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.50 ummla v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: 2 2 1.00 umov w0, v0.b[1]
+# CHECK-NEXT: 2 2 1.00 umov w0, v0.h[1]
+# CHECK-NEXT: 2 2 1.00 mov w0, v0.s[1]
+# CHECK-NEXT: 2 2 1.00 mov x0, v0.d[1]
+# CHECK-NEXT: 1 3 1.00 umull v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 3 1.00 umull v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 3 1.00 umull v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 1.00 umull2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 3 1.00 umull2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 3 1.00 umull2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 uqadd h0, h1, h5
+# CHECK-NEXT: 1 2 0.50 uqadd v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 uqrshl b11, b20, b30
+# CHECK-NEXT: 1 4 0.50 uqrshl s23, s20, s16
+# CHECK-NEXT: 1 4 0.50 uqrshl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.50 uqrshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 uqrshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 uqrshl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 uqrshrn b10, h12, #5
+# CHECK-NEXT: 1 4 0.50 uqrshrn h12, s10, #14
+# CHECK-NEXT: 1 4 0.50 uqrshrn s10, d10, #25
+# CHECK-NEXT: 1 4 0.50 uqrshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 uqrshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 uqrshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 uqrshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 uqrshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 uqrshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 uqshl b11, b20, b30
+# CHECK-NEXT: 1 4 0.50 uqshl b18, b15, #6
+# CHECK-NEXT: 1 4 0.50 uqshl d15, d12, #19
+# CHECK-NEXT: 1 4 0.50 uqshl h11, h18, #7
+# CHECK-NEXT: 1 4 0.50 uqshl s14, s19, #18
+# CHECK-NEXT: 1 4 0.50 uqshl s23, s20, s16
+# CHECK-NEXT: 1 4 0.50 uqshl v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.50 uqshl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.50 uqshl v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 uqshl v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 4 0.50 uqshl v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.50 uqshl v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.50 uqshl v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 uqshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 uqshl v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.50 uqshl v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 uqshl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 uqshrn b12, h10, #7
+# CHECK-NEXT: 1 4 0.50 uqshrn h10, s14, #5
+# CHECK-NEXT: 1 4 0.50 uqshrn s10, d12, #13
+# CHECK-NEXT: 1 4 0.50 uqshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 uqshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 uqshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 uqshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: 1 4 0.50 uqshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 uqshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: 1 2 0.50 uqsub d16, d16, d16
+# CHECK-NEXT: 1 2 0.50 uqsub v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 4 0.50 uqxtn b18, h18
+# CHECK-NEXT: 1 4 0.50 uqxtn h20, s17
+# CHECK-NEXT: 1 4 0.50 uqxtn s19, d14
+# CHECK-NEXT: 1 4 0.50 uqxtn v0.2s, v0.2d
+# CHECK-NEXT: 1 4 0.50 uqxtn v0.4h, v0.4s
+# CHECK-NEXT: 1 4 0.50 uqxtn v0.8b, v0.8h
+# CHECK-NEXT: 1 4 0.50 uqxtn2 v0.16b, v0.8h
+# CHECK-NEXT: 1 4 0.50 uqxtn2 v0.4s, v0.2d
+# CHECK-NEXT: 1 4 0.50 uqxtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 3 1.00 urecpe v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 urecpe v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 urhadd v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 urhadd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 urhadd v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 urshl d8, d7, d4
+# CHECK-NEXT: 1 4 0.50 urshl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 4 0.50 urshl v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 4 0.50 urshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 urshl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 urshr d20, d23, #31
+# CHECK-NEXT: 1 4 0.50 urshr v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.50 urshr v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 urshr v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.50 urshr v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.50 urshr v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 urshr v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.50 urshr v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 3 1.00 ursqrte v0.2s, v0.2s
+# CHECK-NEXT: 2 4 2.00 ursqrte v0.4s, v0.4s
+# CHECK-NEXT: 1 4 0.50 ursra d18, d10, #13
+# CHECK-NEXT: 1 4 0.50 ursra v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.50 ursra v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 ursra v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.50 ursra v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.50 ursra v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 ursra v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.50 ursra v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 3 0.50 usdot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: 1 3 0.50 usdot v0.2s, v0.8b, v0.8b
+# CHECK-NEXT: 1 3 0.50 usdot v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: 1 3 0.50 usdot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: 1 2 0.50 ushl d0, d0, d0
+# CHECK-NEXT: 1 2 0.50 ushl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 ushl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 ushl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 ushll v0.4s, v0.4h, #3
+# CHECK-NEXT: 1 2 0.50 ushll2 v0.8h, v0.16b, #3
+# CHECK-NEXT: 1 2 0.50 ushr d10, d17, #18
+# CHECK-NEXT: 1 2 0.50 ushr v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 2 0.50 ushr v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 2 0.50 ushr v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 2 0.50 ushr v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 2 0.50 ushr v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 2 0.50 ushr v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 2 0.50 ushr v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 3 0.50 usmmla v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: 2 2 1.00 smov w0, v0.b[1]
+# CHECK-NEXT: 2 2 1.00 smov w0, v0.h[1]
+# CHECK-NEXT: 2 2 1.00 smov x0, v0.b[1]
+# CHECK-NEXT: 2 2 1.00 smov x0, v0.h[1]
+# CHECK-NEXT: 2 2 1.00 smov x0, v0.s[1]
+# CHECK-NEXT: 1 2 0.50 usqadd b19, b14
+# CHECK-NEXT: 1 2 0.50 usqadd d18, d22
+# CHECK-NEXT: 1 2 0.50 usqadd h20, h15
+# CHECK-NEXT: 1 2 0.50 usqadd s21, s12
+# CHECK-NEXT: 1 2 0.50 usqadd v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 usqadd v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 usqadd v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 usqadd v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 usqadd v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 usqadd v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 usqadd v0.8h, v0.8h
+# CHECK-NEXT: 1 4 0.50 usra d20, d13, #61
+# CHECK-NEXT: 1 4 0.50 usra v0.16b, v0.16b, #3
+# CHECK-NEXT: 1 4 0.50 usra v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 4 0.50 usra v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 4 0.50 usra v0.4h, v0.4h, #3
+# CHECK-NEXT: 1 4 0.50 usra v0.4s, v0.4s, #3
+# CHECK-NEXT: 1 4 0.50 usra v0.8b, v0.8b, #3
+# CHECK-NEXT: 1 4 0.50 usra v0.8h, v0.8h, #3
+# CHECK-NEXT: 1 2 0.50 usubl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 usubl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 usubl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 usubl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 usubl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 usubl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 usubw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: 1 2 0.50 usubw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: 1 2 0.50 usubw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: 1 2 0.50 usubw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: 1 2 0.50 usubw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: 1 2 0.50 usubw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: 1 2 0.50 uzp1 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 uzp1 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 uzp1 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 uzp1 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 uzp1 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 uzp1 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 uzp1 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 uzp2 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 uzp2 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 uzp2 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 uzp2 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 uzp2 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 uzp2 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 uzp2 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 xtn v0.2s, v0.2d
+# CHECK-NEXT: 1 2 0.50 xtn v0.4h, v0.4s
+# CHECK-NEXT: 1 2 0.50 xtn v0.8b, v0.8h
+# CHECK-NEXT: 1 2 0.50 xtn2 v0.16b, v0.8h
+# CHECK-NEXT: 1 2 0.50 xtn2 v0.4s, v0.2d
+# CHECK-NEXT: 1 2 0.50 xtn2 v0.8h, v0.4s
+# CHECK-NEXT: 1 2 0.50 zip1 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 zip1 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 zip1 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 zip1 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 zip1 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 zip1 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 zip1 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: 1 2 0.50 zip2 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: 1 2 0.50 zip2 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: 1 2 0.50 zip2 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: 1 2 0.50 zip2 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: 1 2 0.50 zip2 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: 1 2 0.50 zip2 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: 1 2 0.50 zip2 v0.8h, v0.8h, v0.8h
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - 27.33 27.33 48.83 14.88 3.88 3.88 3.88 3.88 3.88 3.88 3.88 21.50 1003.00 595.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs d29, d24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add d17, d31, d29
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 addv s0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 addv h0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 addv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 addv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 addv b0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 aesd v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 aese v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 aesimc v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 aesmc v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - bfcvt h0, s0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - bfcvtn v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - bfcvtn2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfdot v0.2s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfdot v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalb v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalb v0.4s, v0.8h, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalt v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalt v0.4s, v0.8h, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmmla v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bic v0.4h, #15, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bic v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bif v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bit v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bsl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cls v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cls v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cls v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cls v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cls v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cls v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 clz v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 clz v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 clz v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 clz v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 clz v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 clz v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmeq d20, d21, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmeq d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmeq v0.16b, v0.16b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmeq v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmge d20, d21, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmge d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmge v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmge v0.8b, v0.8b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmgt d20, d21, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmgt d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmgt v0.2s, v0.2s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmgt v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmhi d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmhi v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmhs d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmhs v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmle d20, d21, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmle v0.2d, v0.2d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmlt d20, d21, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmlt v0.8h, v0.8h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmtst d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cmtst v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cnt v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cnt v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - dup v0.16b, w28
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - dup v0.2d, x28
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - dup v0.2s, w28
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - dup v0.4h, w28
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - dup v0.4s, w28
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - dup v0.8b, w28
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - dup v0.8h, w28
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov b0, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov d0, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov h0, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov s0, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 dup v0.16b, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 dup v0.2d, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 dup v0.2s, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 dup v0.4h, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 dup v0.4s, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 dup v0.8b, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 dup v0.8h, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ext v0.16b, v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ext v0.8b, v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabd d29, d24, d20
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabd s29, s24, s20
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabs v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabs v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabs v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabs v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabs v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 facge d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 facge s10, s11, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 facge v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 facgt d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 facgt s10, s11, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 facgt v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 faddp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 faddp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcadd v0.2s, v0.2s, v0.2s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcadd v0.4s, v0.4s, v0.4s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmeq d20, d21, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmeq d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmeq s10, s11, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmeq s10, s11, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmeq v0.2s, v0.2s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmeq v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmge d20, d21, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmge d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmge s10, s11, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmge s10, s11, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmge v0.2d, v0.2d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmge v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmgt d20, d21, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmgt d20, d21, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmgt s10, s11, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmgt s10, s11, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmgt v0.4s, v0.4s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmgt v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla v0.2s, v0.2s, v0.2s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla v0.4s, v0.4s, v0.s[1], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmle d20, d21, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmle s10, s11, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmle v0.2d, v0.2d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmlt d20, d21, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmlt s10, s11, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmlt v0.4s, v0.4s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtas d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtas s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtas h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtas v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtas v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtas v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtas v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtas v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtau d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtau s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtau h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtau v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtau v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtau v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtau v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtau v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtl v0.2d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtl v0.4s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtl2 v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtl2 v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtms d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtms s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtms h22, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtms v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtms v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtms v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtms v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtms v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtmu d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtmu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtmu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtmu v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtmu v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtmu v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtmu v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtmu v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtn v0.2s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtn v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtn2 v0.4s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtn2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtns d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtns s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtns h22, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtns v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtns v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtns v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtns v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtns v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtnu d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtnu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtnu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtnu v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtnu v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtnu v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtnu v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtnu v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtps d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtps s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtps h22, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtps v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtps v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtps v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtps v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtps v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtpu d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtpu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtpu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtpu v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtpu v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtpu v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtpu v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtpu v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcvtxn s22, d13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtxn v0.2s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtxn2 v0.4s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs d21, d12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzs s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzs s21, s12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtzs h21, h14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtzs h21, h12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzs v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzs v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzs v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtzs v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu d21, d12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzu s21, s12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtzu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtzu h21, h12, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzu v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzu v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzu v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtzu v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 fdiv v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 fdiv v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 fdiv v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 fdiv v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 8.00 fdiv v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnmp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnmp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnmp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 fmaxv h0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.50 1.50 fmaxv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 fmaxv s0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnmp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnmp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnmp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminp v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmla d0, d1, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmla s0, s1, v0.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmla v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlal v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlal v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlal v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlal2 v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlal2 v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlal2 v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlal2 v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmls d0, d4, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmls s3, s5, v0.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmls v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlsl v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlsl v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlsl v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlsl2 v0.2s, v0.2h, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlsl2 v0.4s, v0.4h, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlsl2 v0.2s, v0.2h, v0.2h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlsl2 v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov v0.2d, #-1.25000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov v0.2s, #13.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov v0.4s, #1.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul d0, d1, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul s0, s1, v0.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmulx d0, d4, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmulx d23, d11, d1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmulx s20, s22, s15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmulx s3, s5, v0.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmulx v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmulx v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmulx v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fneg v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fneg v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fneg v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fneg v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fneg v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frecpe d13, d13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frecpe s19, s14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frecpe v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frecpe v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frecpe v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frecpe v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frecpe v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frecps v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frecps d22, d30, d21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frecps s21, s16, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frecpx d16, d19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frecpx s18, s10
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frint32x v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frint32x v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frint32x v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frint32z v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frint32z v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frint32z v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frint64x v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frint64x v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frint64x v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frint64z v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frint64z v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frint64z v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frinta v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frinta v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frinta v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frinta v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frinta v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frinti v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frinti v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frinti v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frinti v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frinti v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintm v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintm v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintm v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintm v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frintm v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintn v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintn v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintn v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintn v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frintn v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintp v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintp v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintp v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintp v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frintp v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintx v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintx v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintx v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintx v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frintx v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintz v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintz v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintz v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintz v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frintz v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frsqrte d21, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frsqrte s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frsqrte v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frsqrte v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frsqrte v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frsqrte v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frsqrte v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frsqrts d8, d22, d18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frsqrts s21, s5, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frsqrts v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 fsqrt v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 fsqrt v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 fsqrt v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 fsqrt v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 8.00 fsqrt v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1 { v0.16b }, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - - - ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ld1 { v0.4s, v1.4s }, [sp], #32
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - - - ld1 { v0.4s, v1.4s, v2.4s }, [sp]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ld1 { v0.8h }, [x15], x2
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - - - ld1 { v0.8h, v1.8h }, [x15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 ld1 { v0.b }[9], [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.50 0.50 ld1 { v0.b }[9], [x0], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 ld1r { v0.16b }, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.50 0.50 ld1r { v0.16b }, [x0], #1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 0.50 0.50 ld1r { v0.8h }, [x15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 0.50 0.50 ld1r { v0.8h }, [x15], #2
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.00 1.00 ld2 { v0.16b, v1.16b }, [x0], x1
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld2 { v0.8b, v1.8b }, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld2 { v0.h, v1.h }[7], [x15]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.00 1.00 ld2 { v0.h, v1.h }[7], [x15], #4
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld2r { v0.2d, v1.2d }, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.00 1.00 ld2r { v0.2d, v1.2d }, [x0], #16
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld2r { v0.4s, v1.4s }, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.00 1.00 ld2r { v0.4s, v1.4s }, [sp], #8
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.50 1.50 ld3 { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.50 1.50 ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.50 1.50 ld3 { v0.s, v1.s, v2.s }[3], [sp]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.50 1.50 ld3 { v0.s, v1.s, v2.s }[3], [sp], x3
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.50 1.50 ld3r { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.50 1.50 ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.50 1.50 ld3r { v0.8b, v1.8b, v2.8b }, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 1.50 1.50 ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 2.00 2.00 ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: - - - - - - - - - 2.00 2.00 2.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 2.00 2.00 ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 2.00 2.00 ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 2.00 2.00 ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 2.00 2.00 ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 2.00 2.00 ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 2.00 2.00 ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 2.00 2.00 ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - 2.00 2.00 ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mla v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mls v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov b0, v0.b[15]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov d6, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov h2, v0.h[5]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov s17, v0.s[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov v2.b[0], v0.b[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov v2.h[1], v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov v2.s[2], v0.s[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov v2.d[1], v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 mov v0.b[0], w8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 mov v0.h[1], w8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 mov v0.s[2], w8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 mov v0.d[1], x8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 movi d15, #0xff00ff00ff00ff
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 movi v0.16b, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 movi v0.2d, #0xff0000ff0000ffff
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 movi v0.2s, #8, msl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 movi v0.4s, #255, lsl #24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 movi v0.8b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mvni v0.2s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mvni v0.4s, #16, msl #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg d29, d24
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mvn v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mvn v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orn v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr v0.8h, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 pmul v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 pmul v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 pmull v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 pmull2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 raddhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 raddhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 raddhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 raddhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 raddhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 raddhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rbit v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rbit v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev16 v21.8b, v1.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev16 v30.16b, v31.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev32 v0.4h, v9.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev32 v21.8b, v1.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev32 v30.16b, v31.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev32 v4.8h, v7.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev64 v0.16b, v31.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev64 v1.8b, v9.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev64 v13.4h, v21.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev64 v2.8h, v4.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev64 v4.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev64 v6.4s, v8.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rsubhn v0.2s, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rsubhn v0.4h, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rsubhn v0.8b, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rsubhn2 v0.16b, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rsubhn2 v0.4s, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rsubhn2 v0.8h, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saba v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabd v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabdl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabdl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabdl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabdl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabdl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabdl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sadalp v0.1d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sadalp v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sadalp v0.2s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sadalp v0.4h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sadalp v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sadalp v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlp v0.1d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlp v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlp v0.2s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlp v0.4h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlp v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlp v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 saddlv d0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 saddlv s0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 saddlv s0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 saddlv h0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 saddlv h0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - scvtf d21, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - scvtf d21, d12, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - scvtf s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - scvtf s22, s13, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - scvtf v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - scvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - scvtf v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - scvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - scvtf v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - scvtf v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - scvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - scvtf v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sdot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sdot v0.2s, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sdot v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sdot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shadd v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shl d7, d10, #12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shl v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shl v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shl v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shl v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shll v0.2d, v0.2s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shll v0.4s, v0.4h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shll v0.8h, v0.8b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shll v0.2d, v0.2s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shll v0.4s, v0.4h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shll v0.8h, v0.8b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shll2 v0.2d, v0.4s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shll2 v0.4s, v0.8h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shll2 v0.8h, v0.16b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shll2 v0.2d, v0.4s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shll2 v0.4s, v0.8h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shll2 v0.8h, v0.16b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrn v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrn v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrn v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shsub v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shsub v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli d10, d14, #12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smaxp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smaxp v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smaxp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 smaxv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 smaxv b0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 smaxv h0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 smaxv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 smaxv s0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sminp v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sminp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sminp v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 sminv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 sminv b0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 sminv h0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 sminv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 sminv s0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlsl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlsl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlsl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlsl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlsl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smmla v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smull v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smull v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smull v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smull2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smull2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smull2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs b19, b14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs d18, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs h21, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs s20, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd b20, b11, b15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlal d19, s24, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlal d8, s9, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlal s0, h0, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlal s17, h27, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlsl d12, s23, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlsl d8, s9, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlsl s0, h0, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlsl s14, h12, h25
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlsl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlsl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlsl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmulh h10, h11, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmulh h7, h15, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmulh s15, s14, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmulh s20, s21, s2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmulh v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmulh v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmull d1, s1, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmull d15, s22, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmull s1, h1, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmull s12, h22, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmull v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmull v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmull2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmull2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg b19, b14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg d18, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg h21, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg s20, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah h0, h1, v2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah v0.4h, v1.4h, v2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah v0.8h, v1.8h, v2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah s0, s1, v2.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah v0.2s, v1.2s, v2.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah v0.4s, v1.4s, v2.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah h0, h1, h2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah v0.4h, v1.4h, v2.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah v0.8h, v1.8h, v2.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah s0, s1, s2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah v0.2s, v1.2s, v2.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh h0, h1, v2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh v0.4h, v1.4h, v2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh v0.8h, v1.8h, v2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh s0, s1, v2.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh v0.2s, v1.2s, v2.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh v0.4s, v1.4s, v2.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh h0, h1, h2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh v0.4h, v1.4h, v2.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh v0.8h, v1.8h, v2.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh s0, s1, s2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh v0.2s, v1.2s, v2.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh v0.4s, v1.4s, v2.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmulh h10, h11, h12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmulh h7, h15, v0.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmulh s15, s14, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmulh s20, s21, s2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmulh v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmulh v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshl d31, d31, d31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshl h3, h4, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrn b10, h13, #2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrn h15, s10, #6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrn s15, d12, #9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrun b17, h10, #6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrun h10, s13, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrun s22, d16, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrun v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrun v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrun v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrun2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrun2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrun2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl b11, b19, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl d15, d16, #51
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl d31, d31, d31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl h13, h18, #11
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl h3, h4, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl s14, s17, #22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu b15, b18, #6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu d11, d13, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu h19, h17, #6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu s16, s14, #25
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrn b10, h15, #5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrn h17, s10, #4
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrn s18, d10, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrun b15, h10, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrun h20, s14, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrun s10, d15, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrun v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrun v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrun v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrun2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrun2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrun2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub s20, s10, s7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtn b18, h18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtn h20, s17
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtn s19, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtn v0.2s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtn v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtn v0.8b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtn2 v0.16b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtn2 v0.4s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtn2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtun b19, h14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtun h21, s15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtun s20, d12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtun v0.2s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtun v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtun v0.8b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtun2 v0.16b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtun2 v0.4s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtun2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srhadd v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srhadd v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srhadd v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri d10, d12, #14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshl d16, d16, d16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr d19, d18, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srsra d15, d11, #19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srsra v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srsra v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srsra v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srsra v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srsra v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srsra v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srsra v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshl d31, d31, d31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshl v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshl v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshl v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshl v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshll v0.2d, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshll2 v0.4s, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshr d15, d16, #12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshr v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshr v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshr v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshr v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshr v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshr v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshr v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssra d18, d12, #21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssra v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssra v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssra v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssra v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssra v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssra v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssra v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1 { v0.16b }, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.50 1.50 1.50 st1 { v0.2d, v1.2d, v2.2d }, [x0], #48
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.00 1.00 1.00 st1 { v0.4s, v1.4s }, [sp], #32
+# CHECK-NEXT: - - - - - - - - - - - 1.50 - - - - - - - - 1.50 1.50 1.50 st1 { v0.4s, v1.4s, v2.4s }, [sp]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.00 1.00 1.00 st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 st1 { v0.8h }, [x15], x2
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1 { v0.8h, v1.8h }, [x15]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 st1 { v0.d }[1], [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 1.00 1.00 st1 { v0.d }[1], [x0], #8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.00 2.00 2.00 st2 { v0.16b, v1.16b }, [x0], x1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 st2 { v0.8b, v1.8b }, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.00 1.00 st2 { v0.s, v1.s }[3], [sp]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 1.00 1.00 st2 { v0.s, v1.s }[3], [sp], #8
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 2.00 2.00 st3 { v0.4h, v1.4h, v2.4h }, [x15]
+# CHECK-NEXT: - - - - - - - - - - - 1.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.50 3.00 3.00 st3 { v0.8h, v1.8h, v2.8h }, [x15], x2
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 2.00 2.00 st3 { v0.h, v1.h, v2.h }[7], [x15]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.00 2.00 2.00 st3 { v0.h, v1.h, v2.h }[7], [x15], #6
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 3.00 3.00 st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 2.00 6.00 6.00 st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 1.50 1.50 st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 1.00 2.00 2.00 st4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], x5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub d15, d5, d16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sudot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sudot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd b19, b14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd d18, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd h20, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd s21, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbl v0.16b, { v0.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbl v0.16b, { v0.16b, v1.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.50 1.50 tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbl v0.8b, { v0.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbl v0.8b, { v0.16b, v1.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.50 1.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbx v0.16b, { v0.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 tbx v0.16b, { v0.16b, v1.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.50 1.50 tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.50 2.50 tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbx v0.8b, { v0.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 tbx v0.8b, { v0.16b, v1.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.50 1.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.50 2.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn1 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn1 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn1 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn1 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn1 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn1 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn1 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn2 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn2 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn2 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn2 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn2 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn2 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn2 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaba v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabd v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabdl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabdl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabdl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabdl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabdl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabdl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uadalp v0.1d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uadalp v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uadalp v0.2s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uadalp v0.4h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uadalp v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uadalp v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddlp v0.1d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddlp v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddlp v0.2s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddlp v0.4h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddlp v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddlp v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 uaddlv d0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 uaddlv s0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 uaddlv s0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 uaddlv h0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 uaddlv h0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - ucvtf d21, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - ucvtf d21, d14, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - ucvtf s22, s13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - ucvtf s22, s13, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - ucvtf v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - ucvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - ucvtf v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - ucvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - ucvtf v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - ucvtf v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - ucvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - ucvtf v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 udot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 udot v0.2s, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 udot v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 udot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhadd v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhadd v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhsub v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umax v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umax v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umax v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umaxp v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umaxp v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umaxp v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 umaxv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 umaxv b0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 umaxv h0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 umaxv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 umaxv s0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umin v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umin v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umin v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uminp v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uminp v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uminp v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 uminv b0, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 uminv b0, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 uminv h0, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 uminv h0, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 uminv s0, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlal v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlal v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlal v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlal2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlal2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlal2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlsl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlsl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlsl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlsl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlsl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlsl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ummla v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 umov w0, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 umov w0, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 mov w0, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 mov x0, v0.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umull v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umull v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umull v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umull2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umull2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umull2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd h0, h1, h5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshl b11, b20, b30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshl s23, s20, s16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrn b10, h12, #5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrn h12, s10, #14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrn s10, d10, #25
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl b11, b20, b30
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl b18, b15, #6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl d15, d12, #19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl h11, h18, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl s14, s19, #18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl s23, s20, s16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrn b12, h10, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrn h10, s14, #5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrn s10, d12, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrn v0.2s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrn v0.4h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrn v0.8b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrn2 v0.16b, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrn2 v0.4s, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrn2 v0.8h, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub d16, d16, d16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtn b18, h18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtn h20, s17
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtn s19, d14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtn v0.2s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtn v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtn v0.8b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtn2 v0.16b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtn2 v0.4s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtn2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - urecpe v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - urecpe v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urhadd v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urhadd v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urhadd v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshl d8, d7, d4
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshl v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr d20, d23, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - ursqrte v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - ursqrte v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ursra d18, d10, #13
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ursra v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ursra v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ursra v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ursra v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ursra v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ursra v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ursra v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usdot v0.2s, v0.8b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usdot v0.2s, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usdot v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usdot v0.4s, v0.16b, v0.4b[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushl d0, d0, d0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushl v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushl v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushl v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushll v0.4s, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushll2 v0.8h, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushr d10, d17, #18
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushr v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushr v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushr v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushr v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushr v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushr v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushr v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usmmla v0.4s, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 smov w0, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 smov w0, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 smov x0, v0.b[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 smov x0, v0.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 smov x0, v0.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd b19, b14
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd d18, d22
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd h20, h15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd s21, s12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usra d20, d13, #61
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usra v0.16b, v0.16b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usra v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usra v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usra v0.4h, v0.4h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usra v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usra v0.8b, v0.8b, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usra v0.8h, v0.8h, #3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubl v0.2d, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubl v0.4s, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubl v0.8h, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubl2 v0.2d, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubl2 v0.4s, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubl2 v0.8h, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubw v0.2d, v0.2d, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubw v0.4s, v0.4s, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubw v0.8h, v0.8h, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubw2 v0.2d, v0.2d, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubw2 v0.4s, v0.4s, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubw2 v0.8h, v0.8h, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp1 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp1 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp1 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp1 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp1 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp1 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp1 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp2 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp2 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp2 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp2 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp2 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp2 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp2 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xtn v0.2s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xtn v0.4h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xtn v0.8b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xtn2 v0.16b, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xtn2 v0.4s, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xtn2 v0.8h, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 v0.8h, v0.8h, v0.8h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 v0.16b, v0.16b, v0.16b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 v0.2d, v0.2d, v0.2d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 v0.2s, v0.2s, v0.2s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 v0.4h, v0.4h, v0.4h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 v0.4s, v0.4s, v0.4s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 v0.8b, v0.8b, v0.8b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 v0.8h, v0.8h, v0.8h
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-sve-instructions.s
new file mode 100644
index 0000000..450de28
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-sve-instructions.s
@@ -0,0 +1,10287 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3ae -mattr=+sve2-aes,+sve2-sha3,+sve2-sm4 -instruction-tables < %s | FileCheck %s
+
+abs z0.b, p0/m, z0.b
+abs z0.d, p0/m, z0.d
+abs z0.h, p0/m, z0.h
+abs z0.s, p0/m, z0.s
+abs z31.b, p7/m, z31.b
+abs z31.d, p7/m, z31.d
+abs z31.h, p7/m, z31.h
+abs z31.s, p7/m, z31.s
+adclb z0.d, z1.d, z31.d
+adclb z0.s, z1.s, z31.s
+adclt z0.d, z1.d, z31.d
+adclt z0.s, z1.s, z31.s
+add z0.b, p0/m, z0.b, z0.b
+add z0.b, z0.b, #0
+add z0.b, z0.b, z0.b
+add z0.d, p0/m, z0.d, z0.d
+add z0.d, z0.d, #0
+add z0.d, z0.d, #0, lsl #8
+add z0.d, z0.d, z0.d
+add z0.h, p0/m, z0.h, z0.h
+add z0.h, z0.h, #0
+add z0.h, z0.h, #0, lsl #8
+add z0.h, z0.h, z0.h
+add z0.s, p0/m, z0.s, z0.s
+add z0.s, z0.s, #0
+add z0.s, z0.s, #0, lsl #8
+add z0.s, z0.s, z0.s
+add z0.s, z1.s, z2.s
+add z21.b, p5/m, z21.b, z10.b
+add z21.b, z10.b, z21.b
+add z21.d, p5/m, z21.d, z10.d
+add z21.d, z10.d, z21.d
+add z21.h, p5/m, z21.h, z10.h
+add z21.h, z10.h, z21.h
+add z21.s, p5/m, z21.s, z10.s
+add z21.s, z10.s, z21.s
+add z23.b, p3/m, z23.b, z13.b
+add z23.b, z13.b, z8.b
+add z23.d, p3/m, z23.d, z13.d
+add z23.d, z13.d, z8.d
+add z23.h, p3/m, z23.h, z13.h
+add z23.h, z13.h, z8.h
+add z23.s, p3/m, z23.s, z13.s
+add z23.s, z13.s, z8.s
+add z31.b, p7/m, z31.b, z31.b
+add z31.b, z31.b, #255
+add z31.b, z31.b, z31.b
+add z31.d, p7/m, z31.d, z31.d
+add z31.d, z31.d, #65280
+add z31.d, z31.d, z31.d
+add z31.h, p7/m, z31.h, z31.h
+add z31.h, z31.h, #65280
+add z31.h, z31.h, z31.h
+add z31.s, p7/m, z31.s, z31.s
+add z31.s, z31.s, #65280
+add z31.s, z31.s, z31.s
+addhnb z0.b, z1.h, z31.h
+addhnb z0.h, z1.s, z31.s
+addhnb z0.s, z1.d, z31.d
+addhnt z0.b, z1.h, z31.h
+addhnt z0.h, z1.s, z31.s
+addhnt z0.s, z1.d, z31.d
+addp z0.b, p0/m, z0.b, z1.b
+addp z0.h, p0/m, z0.h, z1.h
+addp z29.s, p7/m, z29.s, z30.s
+addp z31.d, p7/m, z31.d, z30.d
+addpl sp, sp, #31
+addpl x0, x0, #-32
+addpl x21, x21, #0
+addpl x23, x8, #-1
+addvl sp, sp, #31
+addvl x0, x0, #-32
+addvl x21, x21, #0
+addvl x23, x8, #-1
+adr z0.d, [z0.d, z0.d, lsl #1]
+adr z0.d, [z0.d, z0.d, lsl #2]
+adr z0.d, [z0.d, z0.d, lsl #3]
+adr z0.d, [z0.d, z0.d, sxtw #1]
+adr z0.d, [z0.d, z0.d, sxtw #2]
+adr z0.d, [z0.d, z0.d, sxtw #3]
+adr z0.d, [z0.d, z0.d, sxtw]
+adr z0.d, [z0.d, z0.d, uxtw #1]
+adr z0.d, [z0.d, z0.d, uxtw #2]
+adr z0.d, [z0.d, z0.d, uxtw #3]
+adr z0.d, [z0.d, z0.d, uxtw]
+adr z0.d, [z0.d, z0.d]
+adr z0.s, [z0.s, z0.s, lsl #1]
+adr z0.s, [z0.s, z0.s, lsl #2]
+adr z0.s, [z0.s, z0.s, lsl #3]
+adr z0.s, [z0.s, z0.s]
+aesd z0.b, z0.b, z31.b
+aese z0.b, z0.b, z31.b
+aesimc z0.b, z0.b
+aesimc z31.b, z31.b
+aesmc z0.b, z0.b
+aesmc z31.b, z31.b
+and p0.b, p0/z, p0.b, p1.b
+and z0.d, z0.d, #0x6
+and z0.d, z0.d, #0xfffffffffffffff9
+and z0.d, z0.d, z0.d
+and z0.s, z0.s, #0x6
+and z0.s, z0.s, #0xfffffff9
+and z23.d, z13.d, z8.d
+and z23.h, z23.h, #0x6
+and z23.h, z23.h, #0xfff9
+and z31.b, p7/m, z31.b, z31.b
+and z31.d, p7/m, z31.d, z31.d
+and z31.h, p7/m, z31.h, z31.h
+and z31.s, p7/m, z31.s, z31.s
+and z5.b, z5.b, #0x6
+and z5.b, z5.b, #0xf9
+ands p0.b, p0/z, p0.b, p1.b
+andv b0, p7, z31.b
+andv d0, p7, z31.d
+andv h0, p7, z31.h
+andv s0, p7, z31.s
+asr z0.b, p0/m, z0.b, #1
+asr z0.b, p0/m, z0.b, z0.b
+asr z0.b, p0/m, z0.b, z1.d
+asr z0.b, z0.b, #1
+asr z0.b, z1.b, z2.d
+asr z0.d, p0/m, z0.d, #1
+asr z0.d, p0/m, z0.d, z0.d
+asr z0.d, z0.d, #1
+asr z0.h, p0/m, z0.h, #1
+asr z0.h, p0/m, z0.h, z0.h
+asr z0.h, p0/m, z0.h, z1.d
+asr z0.h, z0.h, #1
+asr z0.h, z1.h, z2.d
+asr z0.s, p0/m, z0.s, #1
+asr z0.s, p0/m, z0.s, z0.s
+asr z0.s, p0/m, z0.s, z1.d
+asr z0.s, z0.s, #1
+asr z0.s, z1.s, z2.d
+asr z31.b, p0/m, z31.b, #8
+asr z31.b, z31.b, #8
+asr z31.d, p0/m, z31.d, #64
+asr z31.d, z31.d, #64
+asr z31.h, p0/m, z31.h, #16
+asr z31.h, z31.h, #16
+asr z31.s, p0/m, z31.s, #32
+asr z31.s, z31.s, #32
+asrd z0.b, p0/m, z0.b, #1
+asrd z0.d, p0/m, z0.d, #1
+asrd z0.h, p0/m, z0.h, #1
+asrd z0.s, p0/m, z0.s, #1
+asrd z31.b, p0/m, z31.b, #8
+asrd z31.d, p0/m, z31.d, #64
+asrd z31.h, p0/m, z31.h, #16
+asrd z31.s, p0/m, z31.s, #32
+asrr z0.b, p0/m, z0.b, z0.b
+asrr z0.d, p0/m, z0.d, z0.d
+asrr z0.h, p0/m, z0.h, z0.h
+asrr z0.s, p0/m, z0.s, z0.s
+bcax z29.d, z29.d, z30.d, z31.d
+bdep z0.b, z1.b, z31.b
+bdep z0.d, z1.d, z31.d
+bdep z0.h, z1.h, z31.h
+bdep z0.s, z1.s, z31.s
+bext z0.b, z1.b, z31.b
+bext z0.d, z1.d, z31.d
+bext z0.h, z1.h, z31.h
+bext z0.s, z1.s, z31.s
+bfcvt z0.h, p0/m, z1.s
+bfcvtnt z0.h, p0/m, z1.s
+bfdot z0.s, z1.h, z2.h
+bfdot z0.s, z1.h, z2.h[0]
+bfdot z0.s, z1.h, z2.h[3]
+bfmlalb z0.s, z1.h, z2.h
+bfmlalb z0.s, z1.h, z2.h[0]
+bfmlalb z0.s, z1.h, z2.h[7]
+bfmlalb z10.s, z21.h, z14.h
+bfmlalb z21.s, z14.h, z3.h[2]
+bfmlalt z0.s, z1.h, z2.h
+bfmlalt z0.s, z1.h, z2.h[0]
+bfmlalt z0.s, z1.h, z2.h[7]
+bfmlalt z0.s, z1.h, z7.h[7]
+bfmlalt z14.s, z10.h, z21.h
+bfmmla z0.s, z1.h, z2.h
+bgrp z0.b, z1.b, z31.b
+bgrp z0.d, z1.d, z31.d
+bgrp z0.h, z1.h, z31.h
+bgrp z0.s, z1.s, z31.s
+bic p0.b, p0/z, p0.b, p0.b
+bic p15.b, p15/z, p15.b, p15.b
+bic z0.d, z0.d, z0.d
+bic z23.d, z13.d, z8.d
+bic z31.b, p7/m, z31.b, z31.b
+bic z31.d, p7/m, z31.d, z31.d
+bic z31.h, p7/m, z31.h, z31.h
+bic z31.s, p7/m, z31.s, z31.s
+bics p0.b, p0/z, p0.b, p0.b
+bics p15.b, p15/z, p15.b, p15.b
+brka p0.b, p15/m, p15.b
+brka p0.b, p15/z, p15.b
+brkas p0.b, p15/z, p15.b
+brkb p0.b, p15/m, p15.b
+brkb p0.b, p15/z, p15.b
+brkbs p0.b, p15/z, p15.b
+brkn p0.b, p15/z, p1.b, p0.b
+brkn p15.b, p15/z, p15.b, p15.b
+brkns p0.b, p15/z, p1.b, p0.b
+brkns p15.b, p15/z, p15.b, p15.b
+brkpa p0.b, p15/z, p1.b, p2.b
+brkpa p15.b, p15/z, p15.b, p15.b
+brkpas p0.b, p15/z, p1.b, p2.b
+brkpas p15.b, p15/z, p15.b, p15.b
+brkpb p0.b, p15/z, p1.b, p2.b
+brkpb p15.b, p15/z, p15.b, p15.b
+brkpbs p0.b, p15/z, p1.b, p2.b
+brkpbs p15.b, p15/z, p15.b, p15.b
+bsl z0.d, z0.d, z1.d, z2.d
+bsl1n z0.d, z0.d, z1.d, z2.d
+bsl2n z0.d, z0.d, z1.d, z2.d
+cadd z0.b, z0.b, z0.b, #90
+cadd z0.d, z0.d, z0.d, #90
+cadd z0.h, z0.h, z0.h, #90
+cadd z0.s, z0.s, z0.s, #90
+cadd z31.b, z31.b, z31.b, #270
+cadd z31.d, z31.d, z31.d, #270
+cadd z31.h, z31.h, z31.h, #270
+cadd z31.s, z31.s, z31.s, #270
+cdot z0.d, z1.h, z15.h[1], #0
+cdot z0.d, z1.h, z31.h, #0
+cdot z0.d, z1.h, z31.h, #180
+cdot z0.d, z1.h, z31.h, #270
+cdot z0.d, z1.h, z31.h, #90
+cdot z0.s, z1.b, z31.b, #0
+cdot z0.s, z1.b, z7.b[3], #0
+cdot z29.d, z30.h, z0.h[0], #180
+cdot z31.d, z30.h, z7.h[1], #270
+cdot z5.d, z6.h, z3.h[0], #90
+clasta b0, p7, b0, z31.b
+clasta d0, p7, d0, z31.d
+clasta h0, p7, h0, z31.h
+clasta s0, p7, s0, z31.s
+clasta w0, p7, w0, z31.b
+clasta w0, p7, w0, z31.h
+clasta w0, p7, w0, z31.s
+clasta x0, p7, x0, z31.d
+clasta z0.b, p7, z0.b, z31.b
+clasta z0.d, p7, z0.d, z31.d
+clasta z0.h, p7, z0.h, z31.h
+clasta z0.s, p7, z0.s, z31.s
+clastb b0, p7, b0, z31.b
+clastb d0, p7, d0, z31.d
+clastb h0, p7, h0, z31.h
+clastb s0, p7, s0, z31.s
+clastb w0, p7, w0, z31.b
+clastb w0, p7, w0, z31.h
+clastb w0, p7, w0, z31.s
+clastb x0, p7, x0, z31.d
+clastb z0.b, p7, z0.b, z31.b
+clastb z0.d, p7, z0.d, z31.d
+clastb z0.h, p7, z0.h, z31.h
+clastb z0.s, p7, z0.s, z31.s
+cls z31.b, p7/m, z31.b
+cls z31.d, p7/m, z31.d
+cls z31.h, p7/m, z31.h
+cls z31.s, p7/m, z31.s
+clz z31.b, p7/m, z31.b
+clz z31.d, p7/m, z31.d
+clz z31.h, p7/m, z31.h
+clz z31.s, p7/m, z31.s
+cmla z0.b, z1.b, z2.b, #0
+cmla z0.d, z1.d, z2.d, #0
+cmla z0.h, z1.h, z2.h, #0
+cmla z0.h, z1.h, z2.h[0], #0
+cmla z0.s, z1.s, z2.s, #0
+cmla z0.s, z1.s, z2.s[0], #0
+cmla z15.b, z16.b, z17.b, #270
+cmla z15.d, z16.d, z17.d, #270
+cmla z15.h, z16.h, z17.h, #270
+cmla z15.s, z16.s, z17.s, #270
+cmla z29.b, z30.b, z31.b, #90
+cmla z29.d, z30.d, z31.d, #90
+cmla z29.h, z30.h, z31.h, #90
+cmla z29.s, z30.s, z31.s, #90
+cmla z31.b, z31.b, z31.b, #180
+cmla z31.d, z31.d, z31.d, #180
+cmla z31.h, z30.h, z7.h[0], #180
+cmla z31.h, z31.h, z31.h, #180
+cmla z31.s, z30.s, z7.s[0], #180
+cmla z31.s, z31.s, z31.s, #180
+cmpeq p0.b, p0/z, z0.b, #-16
+cmpeq p0.b, p0/z, z0.b, #15
+cmpeq p0.b, p0/z, z0.b, z0.b
+cmpeq p0.b, p0/z, z0.b, z0.d
+cmpeq p0.d, p0/z, z0.d, #-16
+cmpeq p0.d, p0/z, z0.d, #15
+cmpeq p0.d, p0/z, z0.d, z0.d
+cmpeq p0.h, p0/z, z0.h, #-16
+cmpeq p0.h, p0/z, z0.h, #15
+cmpeq p0.h, p0/z, z0.h, z0.d
+cmpeq p0.h, p0/z, z0.h, z0.h
+cmpeq p0.s, p0/z, z0.s, #-16
+cmpeq p0.s, p0/z, z0.s, #15
+cmpeq p0.s, p0/z, z0.s, z0.d
+cmpeq p0.s, p0/z, z0.s, z0.s
+cmpge p0.b, p0/z, z0.b, #-16
+cmpge p0.b, p0/z, z0.b, #15
+cmpge p0.b, p0/z, z0.b, z0.b
+cmpge p0.b, p0/z, z0.b, z0.d
+cmpge p0.b, p0/z, z1.b, z0.b
+cmpge p0.d, p0/z, z0.d, #-16
+cmpge p0.d, p0/z, z0.d, #15
+cmpge p0.d, p0/z, z0.d, z0.d
+cmpge p0.d, p0/z, z1.d, z0.d
+cmpge p0.h, p0/z, z0.h, #-16
+cmpge p0.h, p0/z, z0.h, #15
+cmpge p0.h, p0/z, z0.h, z0.d
+cmpge p0.h, p0/z, z0.h, z0.h
+cmpge p0.h, p0/z, z1.h, z0.h
+cmpge p0.s, p0/z, z0.s, #-16
+cmpge p0.s, p0/z, z0.s, #15
+cmpge p0.s, p0/z, z0.s, z0.d
+cmpge p0.s, p0/z, z0.s, z0.s
+cmpge p0.s, p0/z, z1.s, z0.s
+cmpgt p0.b, p0/z, z0.b, #-16
+cmpgt p0.b, p0/z, z0.b, #15
+cmpgt p0.b, p0/z, z0.b, z0.b
+cmpgt p0.b, p0/z, z0.b, z0.d
+cmpgt p0.b, p0/z, z1.b, z0.b
+cmpgt p0.d, p0/z, z0.d, #-16
+cmpgt p0.d, p0/z, z0.d, #15
+cmpgt p0.d, p0/z, z0.d, z0.d
+cmpgt p0.d, p0/z, z1.d, z0.d
+cmpgt p0.h, p0/z, z0.h, #-16
+cmpgt p0.h, p0/z, z0.h, #15
+cmpgt p0.h, p0/z, z0.h, z0.d
+cmpgt p0.h, p0/z, z0.h, z0.h
+cmpgt p0.h, p0/z, z1.h, z0.h
+cmpgt p0.s, p0/z, z0.s, #-16
+cmpgt p0.s, p0/z, z0.s, #15
+cmpgt p0.s, p0/z, z0.s, z0.d
+cmpgt p0.s, p0/z, z0.s, z0.s
+cmpgt p0.s, p0/z, z1.s, z0.s
+cmphi p0.b, p0/z, z0.b, #0
+cmphi p0.b, p0/z, z0.b, #127
+cmphi p0.b, p0/z, z0.b, z0.b
+cmphi p0.b, p0/z, z0.b, z0.d
+cmphi p0.b, p0/z, z1.b, z0.b
+cmphi p0.d, p0/z, z0.d, #0
+cmphi p0.d, p0/z, z0.d, #127
+cmphi p0.d, p0/z, z0.d, z0.d
+cmphi p0.d, p0/z, z1.d, z0.d
+cmphi p0.h, p0/z, z0.h, #0
+cmphi p0.h, p0/z, z0.h, #127
+cmphi p0.h, p0/z, z0.h, z0.d
+cmphi p0.h, p0/z, z0.h, z0.h
+cmphi p0.h, p0/z, z1.h, z0.h
+cmphi p0.s, p0/z, z0.s, #0
+cmphi p0.s, p0/z, z0.s, #127
+cmphi p0.s, p0/z, z0.s, z0.d
+cmphi p0.s, p0/z, z0.s, z0.s
+cmphi p0.s, p0/z, z1.s, z0.s
+cmphs p0.b, p0/z, z0.b, #0
+cmphs p0.b, p0/z, z0.b, #127
+cmphs p0.b, p0/z, z0.b, z0.b
+cmphs p0.b, p0/z, z0.b, z0.d
+cmphs p0.b, p0/z, z1.b, z0.b
+cmphs p0.d, p0/z, z0.d, #0
+cmphs p0.d, p0/z, z0.d, #127
+cmphs p0.d, p0/z, z0.d, z0.d
+cmphs p0.d, p0/z, z1.d, z0.d
+cmphs p0.h, p0/z, z0.h, #0
+cmphs p0.h, p0/z, z0.h, #127
+cmphs p0.h, p0/z, z0.h, z0.d
+cmphs p0.h, p0/z, z0.h, z0.h
+cmphs p0.h, p0/z, z1.h, z0.h
+cmphs p0.s, p0/z, z0.s, #0
+cmphs p0.s, p0/z, z0.s, #127
+cmphs p0.s, p0/z, z0.s, z0.d
+cmphs p0.s, p0/z, z0.s, z0.s
+cmphs p0.s, p0/z, z1.s, z0.s
+cmple p0.b, p0/z, z0.b, #-16
+cmple p0.b, p0/z, z0.b, #15
+cmple p0.b, p0/z, z0.b, z0.d
+cmple p0.d, p0/z, z0.d, #-16
+cmple p0.d, p0/z, z0.d, #15
+cmple p0.h, p0/z, z0.h, #-16
+cmple p0.h, p0/z, z0.h, #15
+cmple p0.h, p0/z, z0.h, z0.d
+cmple p0.s, p0/z, z0.s, #-16
+cmple p0.s, p0/z, z0.s, #15
+cmple p0.s, p0/z, z0.s, z0.d
+cmplo p0.b, p0/z, z0.b, #0
+cmplo p0.b, p0/z, z0.b, #127
+cmplo p0.b, p0/z, z0.b, z0.d
+cmplo p0.d, p0/z, z0.d, #0
+cmplo p0.d, p0/z, z0.d, #127
+cmplo p0.h, p0/z, z0.h, #0
+cmplo p0.h, p0/z, z0.h, #127
+cmplo p0.h, p0/z, z0.h, z0.d
+cmplo p0.s, p0/z, z0.s, #0
+cmplo p0.s, p0/z, z0.s, #127
+cmplo p0.s, p0/z, z0.s, z0.d
+cmpls p0.b, p0/z, z0.b, #0
+cmpls p0.b, p0/z, z0.b, #127
+cmpls p0.b, p0/z, z0.b, z0.d
+cmpls p0.d, p0/z, z0.d, #0
+cmpls p0.d, p0/z, z0.d, #127
+cmpls p0.h, p0/z, z0.h, #0
+cmpls p0.h, p0/z, z0.h, #127
+cmpls p0.h, p0/z, z0.h, z0.d
+cmpls p0.s, p0/z, z0.s, #0
+cmpls p0.s, p0/z, z0.s, #127
+cmpls p0.s, p0/z, z0.s, z0.d
+cmplt p0.b, p0/z, z0.b, #-16
+cmplt p0.b, p0/z, z0.b, #15
+cmplt p0.b, p0/z, z0.b, z0.d
+cmplt p0.d, p0/z, z0.d, #-16
+cmplt p0.d, p0/z, z0.d, #15
+cmplt p0.h, p0/z, z0.h, #-16
+cmplt p0.h, p0/z, z0.h, #15
+cmplt p0.h, p0/z, z0.h, z0.d
+cmplt p0.s, p0/z, z0.s, #-16
+cmplt p0.s, p0/z, z0.s, #15
+cmplt p0.s, p0/z, z0.s, z0.d
+cmpne p0.b, p0/z, z0.b, #-16
+cmpne p0.b, p0/z, z0.b, #15
+cmpne p0.b, p0/z, z0.b, z0.b
+cmpne p0.b, p0/z, z0.b, z0.d
+cmpne p0.d, p0/z, z0.d, #-16
+cmpne p0.d, p0/z, z0.d, #15
+cmpne p0.d, p0/z, z0.d, z0.d
+cmpne p0.h, p0/z, z0.h, #-16
+cmpne p0.h, p0/z, z0.h, #15
+cmpne p0.h, p0/z, z0.h, z0.d
+cmpne p0.h, p0/z, z0.h, z0.h
+cmpne p0.s, p0/z, z0.s, #-16
+cmpne p0.s, p0/z, z0.s, #15
+cmpne p0.s, p0/z, z0.s, z0.d
+cmpne p0.s, p0/z, z0.s, z0.s
+cnot z31.b, p7/m, z31.b
+cnot z31.d, p7/m, z31.d
+cnot z31.h, p7/m, z31.h
+cnot z31.s, p7/m, z31.s
+cnt z31.b, p7/m, z31.b
+cnt z31.d, p7/m, z31.d
+cnt z31.h, p7/m, z31.h
+cnt z31.s, p7/m, z31.s
+cntb x0
+cntb x0, #28
+cntb x0, all, mul #16
+cntb x0, pow2
+cntd x0
+cntd x0, #28
+cntd x0, all, mul #16
+cntd x0, pow2
+cnth x0
+cnth x0, #28
+cnth x0, all, mul #16
+cnth x0, pow2
+cntp x0, p15, p0.b
+cntp x0, p15, p0.d
+cntp x0, p15, p0.h
+cntp x0, p15, p0.s
+cntw x0
+cntw x0, #28
+cntw x0, all, mul #16
+cntw x0, pow2
+compact z31.d, p7, z31.d
+compact z31.s, p7, z31.s
+ctermeq w30, wzr
+ctermeq wzr, w30
+ctermeq x30, xzr
+ctermeq xzr, x30
+ctermne w30, wzr
+ctermne wzr, w30
+ctermne x30, xzr
+ctermne xzr, x30
+decb x0
+decb x0, #14
+decb x0, all, mul #16
+decb x0, pow2
+decb x0, vl1
+decd x0
+decd x0, #14
+decd x0, all, mul #16
+decd x0, pow2
+decd x0, vl1
+dech x0
+dech x0, #14
+dech x0, all, mul #16
+dech x0, pow2
+dech x0, vl1
+decp x0, p0.b
+decp x0, p0.d
+decp x0, p0.h
+decp x0, p0.s
+decp xzr, p15.b
+decp xzr, p15.d
+decp xzr, p15.h
+decp xzr, p15.s
+decp z31.d, p15.d
+decp z31.h, p15.h
+decp z31.s, p15.s
+decw x0
+decw x0, #14
+decw x0, all, mul #16
+decw x0, pow2
+decw x0, vl1
+dupm z0.d, #0xfffffffffffffff9
+dupm z0.s, #0xfffffff9
+dupm z23.h, #0xfff9
+dupm z5.b, #0xf9
+eor p0.b, p0/z, p0.b, p1.b
+eor z0.d, z0.d, #0x6
+eor z0.d, z0.d, #0xfffffffffffffff9
+eor z0.d, z0.d, z0.d
+eor z0.s, z0.s, #0x6
+eor z0.s, z0.s, #0xfffffff9
+eor z23.d, z13.d, z8.d
+eor z23.h, z23.h, #0x6
+eor z23.h, z23.h, #0xfff9
+eor z31.b, p7/m, z31.b, z31.b
+eor z31.d, p7/m, z31.d, z31.d
+eor z31.h, p7/m, z31.h, z31.h
+eor z31.s, p7/m, z31.s, z31.s
+eor z5.b, z5.b, #0x6
+eor z5.b, z5.b, #0xf9
+eor3 z29.d, z29.d, z30.d, z31.d
+eorbt z0.b, z1.b, z31.b
+eorbt z0.d, z1.d, z31.d
+eorbt z0.h, z1.h, z31.h
+eorbt z0.s, z1.s, z31.s
+eors p0.b, p0/z, p0.b, p1.b
+eortb z0.b, z1.b, z31.b
+eortb z0.d, z1.d, z31.d
+eortb z0.h, z1.h, z31.h
+eortb z0.s, z1.s, z31.s
+eorv b0, p7, z31.b
+eorv d0, p7, z31.d
+eorv h0, p7, z31.h
+eorv s0, p7, z31.s
+ext z0.b, { z1.b, z2.b }, #0
+ext z31.b, z31.b, z0.b, #0
+ext z31.b, z31.b, z0.b, #255
+ext z31.b, { z30.b, z31.b }, #255
+fabd z0.d, p7/m, z0.d, z31.d
+fabd z0.h, p7/m, z0.h, z31.h
+fabd z0.s, p7/m, z0.s, z31.s
+fabs z31.d, p7/m, z31.d
+fabs z31.h, p7/m, z31.h
+fabs z31.s, p7/m, z31.s
+facge p0.d, p0/z, z0.d, z1.d
+facge p0.d, p0/z, z1.d, z0.d
+facge p0.h, p0/z, z0.h, z1.h
+facge p0.h, p0/z, z1.h, z0.h
+facge p0.s, p0/z, z0.s, z1.s
+facge p0.s, p0/z, z1.s, z0.s
+facgt p0.d, p0/z, z0.d, z1.d
+facgt p0.d, p0/z, z1.d, z0.d
+facgt p0.h, p0/z, z0.h, z1.h
+facgt p0.h, p0/z, z1.h, z0.h
+facgt p0.s, p0/z, z0.s, z1.s
+facgt p0.s, p0/z, z1.s, z0.s
+fadd z0.d, p0/m, z0.d, #0.5
+fadd z0.d, p7/m, z0.d, z31.d
+fadd z0.d, z1.d, z31.d
+fadd z0.h, p0/m, z0.h, #0.5
+fadd z0.h, p7/m, z0.h, z31.h
+fadd z0.h, z1.h, z31.h
+fadd z0.s, p0/m, z0.s, #0.5
+fadd z0.s, p7/m, z0.s, z31.s
+fadd z0.s, z1.s, z31.s
+fadd z31.d, p7/m, z31.d, #1.0
+fadd z31.h, p7/m, z31.h, #1.0
+fadd z31.s, p7/m, z31.s, #1.0
+fadda d0, p7, d0, z31.d
+fadda h0, p7, h0, z31.h
+fadda s0, p7, s0, z31.s
+faddp z0.h, p0/m, z0.h, z1.h
+faddp z29.s, p3/m, z29.s, z30.s
+faddp z31.d, p7/m, z31.d, z30.d
+faddv d0, p7, z31.d
+faddv h0, p7, z31.h
+faddv s0, p7, z31.s
+fcadd z0.d, p0/m, z0.d, z0.d, #90
+fcadd z0.h, p0/m, z0.h, z0.h, #90
+fcadd z0.s, p0/m, z0.s, z0.s, #90
+fcadd z31.d, p7/m, z31.d, z31.d, #270
+fcadd z31.h, p7/m, z31.h, z31.h, #270
+fcadd z31.s, p7/m, z31.s, z31.s, #270
+fcmeq p0.d, p0/z, z0.d, #0.0
+fcmeq p0.d, p0/z, z0.d, z1.d
+fcmeq p0.h, p0/z, z0.h, #0.0
+fcmeq p0.h, p0/z, z0.h, z1.h
+fcmeq p0.s, p0/z, z0.s, #0.0
+fcmeq p0.s, p0/z, z0.s, z1.s
+fcmge p0.d, p0/z, z0.d, #0.0
+fcmge p0.d, p0/z, z0.d, z1.d
+fcmge p0.d, p0/z, z1.d, z0.d
+fcmge p0.h, p0/z, z0.h, #0.0
+fcmge p0.h, p0/z, z0.h, z1.h
+fcmge p0.h, p0/z, z1.h, z0.h
+fcmge p0.s, p0/z, z0.s, #0.0
+fcmge p0.s, p0/z, z0.s, z1.s
+fcmge p0.s, p0/z, z1.s, z0.s
+fcmgt p0.d, p0/z, z0.d, #0.0
+fcmgt p0.d, p0/z, z0.d, z1.d
+fcmgt p0.d, p0/z, z1.d, z0.d
+fcmgt p0.h, p0/z, z0.h, #0.0
+fcmgt p0.h, p0/z, z0.h, z1.h
+fcmgt p0.h, p0/z, z1.h, z0.h
+fcmgt p0.s, p0/z, z0.s, #0.0
+fcmgt p0.s, p0/z, z0.s, z1.s
+fcmgt p0.s, p0/z, z1.s, z0.s
+fcmla z0.d, p0/m, z0.d, z0.d, #0
+fcmla z0.d, p0/m, z1.d, z2.d, #90
+fcmla z0.h, p0/m, z0.h, z0.h, #0
+fcmla z0.h, p0/m, z1.h, z2.h, #90
+fcmla z0.h, z0.h, z0.h[0], #0
+fcmla z0.s, p0/m, z0.s, z0.s, #0
+fcmla z0.s, p0/m, z1.s, z2.s, #90
+fcmla z21.s, z10.s, z5.s[1], #90
+fcmla z23.s, z13.s, z8.s[0], #270
+fcmla z29.d, p7/m, z30.d, z31.d, #180
+fcmla z29.h, p7/m, z30.h, z31.h, #180
+fcmla z29.s, p7/m, z30.s, z31.s, #180
+fcmla z31.d, p7/m, z31.d, z31.d, #270
+fcmla z31.h, p7/m, z31.h, z31.h, #270
+fcmla z31.h, z31.h, z7.h[3], #270
+fcmla z31.s, p7/m, z31.s, z31.s, #270
+fcmle p0.d, p0/z, z0.d, #0.0
+fcmle p0.h, p0/z, z0.h, #0.0
+fcmle p0.s, p0/z, z0.s, #0.0
+fcmlt p0.d, p0/z, z0.d, #0.0
+fcmlt p0.h, p0/z, z0.h, #0.0
+fcmlt p0.s, p0/z, z0.s, #0.0
+fcmne p0.d, p0/z, z0.d, #0.0
+fcmne p0.d, p0/z, z0.d, z1.d
+fcmne p0.h, p0/z, z0.h, #0.0
+fcmne p0.h, p0/z, z0.h, z1.h
+fcmne p0.s, p0/z, z0.s, #0.0
+fcmne p0.s, p0/z, z0.s, z1.s
+fcmuo p0.d, p0/z, z0.d, z1.d
+fcmuo p0.h, p0/z, z0.h, z1.h
+fcmuo p0.s, p0/z, z0.s, z1.s
+fcvt z0.d, p0/m, z0.h
+fcvt z0.d, p0/m, z0.s
+fcvt z0.h, p0/m, z0.d
+fcvt z0.h, p0/m, z0.s
+fcvt z0.s, p0/m, z0.d
+fcvt z0.s, p0/m, z0.h
+fcvtlt z0.s, p0/m, z1.h
+fcvtlt z30.d, p7/m, z31.s
+fcvtnt z0.h, p0/m, z1.s
+fcvtnt z30.s, p7/m, z31.d
+fcvtx z0.s, p0/m, z0.d
+fcvtx z30.s, p7/m, z31.d
+fcvtxnt z0.s, p0/m, z1.d
+fcvtxnt z30.s, p7/m, z31.d
+fcvtzs z0.d, p0/m, z0.d
+fcvtzs z0.d, p0/m, z0.h
+fcvtzs z0.d, p0/m, z0.s
+fcvtzs z0.h, p0/m, z0.h
+fcvtzs z0.s, p0/m, z0.d
+fcvtzs z0.s, p0/m, z0.h
+fcvtzs z0.s, p0/m, z0.s
+fcvtzu z0.d, p0/m, z0.d
+fcvtzu z0.d, p0/m, z0.h
+fcvtzu z0.d, p0/m, z0.s
+fcvtzu z0.h, p0/m, z0.h
+fcvtzu z0.s, p0/m, z0.d
+fcvtzu z0.s, p0/m, z0.h
+fcvtzu z0.s, p0/m, z0.s
+fdiv z0.d, p7/m, z0.d, z31.d
+fdiv z0.h, p7/m, z0.h, z31.h
+fdiv z0.s, p7/m, z0.s, z31.s
+fdivr z0.d, p7/m, z0.d, z31.d
+fdivr z0.h, p7/m, z0.h, z31.h
+fdivr z0.s, p7/m, z0.s, z31.s
+fexpa z0.d, z31.d
+fexpa z0.h, z31.h
+fexpa z0.s, z31.s
+flogb z31.d, p7/m, z31.d
+flogb z31.h, p7/m, z31.h
+flogb z31.s, p7/m, z31.s
+fmad z0.d, p7/m, z1.d, z31.d
+fmad z0.h, p7/m, z1.h, z31.h
+fmad z0.s, p7/m, z1.s, z31.s
+fmax z0.d, p0/m, z0.d, #0.0
+fmax z0.d, p7/m, z0.d, z31.d
+fmax z0.h, p0/m, z0.h, #0.0
+fmax z0.h, p7/m, z0.h, z31.h
+fmax z0.s, p0/m, z0.s, #0.0
+fmax z0.s, p7/m, z0.s, z31.s
+fmax z31.d, p7/m, z31.d, #1.0
+fmax z31.h, p7/m, z31.h, #1.0
+fmax z31.s, p7/m, z31.s, #1.0
+fmaxnm z0.d, p0/m, z0.d, #0.0
+fmaxnm z0.d, p7/m, z0.d, z31.d
+fmaxnm z0.h, p0/m, z0.h, #0.0
+fmaxnm z0.h, p7/m, z0.h, z31.h
+fmaxnm z0.s, p0/m, z0.s, #0.0
+fmaxnm z0.s, p7/m, z0.s, z31.s
+fmaxnm z31.d, p7/m, z31.d, #1.0
+fmaxnm z31.h, p7/m, z31.h, #1.0
+fmaxnm z31.s, p7/m, z31.s, #1.0
+fmaxnmp z0.h, p0/m, z0.h, z1.h
+fmaxnmp z29.s, p3/m, z29.s, z30.s
+fmaxnmp z31.d, p7/m, z31.d, z30.d
+fmaxnmv d0, p7, z31.d
+fmaxnmv h0, p7, z31.h
+fmaxnmv s0, p7, z31.s
+fmaxp z0.h, p0/m, z0.h, z1.h
+fmaxp z29.s, p3/m, z29.s, z30.s
+fmaxp z31.d, p7/m, z31.d, z30.d
+fmaxv d0, p7, z31.d
+fmaxv h0, p7, z31.h
+fmaxv s0, p7, z31.s
+fmin z0.d, p0/m, z0.d, #0.0
+fmin z0.d, p7/m, z0.d, z31.d
+fmin z0.h, p0/m, z0.h, #0.0
+fmin z0.h, p7/m, z0.h, z31.h
+fmin z0.s, p0/m, z0.s, #0.0
+fmin z0.s, p7/m, z0.s, z31.s
+fmin z31.d, p7/m, z31.d, #1.0
+fmin z31.h, p7/m, z31.h, #1.0
+fmin z31.s, p7/m, z31.s, #1.0
+fminnm z0.d, p0/m, z0.d, #0.0
+fminnm z0.d, p7/m, z0.d, z31.d
+fminnm z0.h, p0/m, z0.h, #0.0
+fminnm z0.h, p7/m, z0.h, z31.h
+fminnm z0.s, p0/m, z0.s, #0.0
+fminnm z0.s, p7/m, z0.s, z31.s
+fminnm z31.d, p7/m, z31.d, #1.0
+fminnm z31.h, p7/m, z31.h, #1.0
+fminnm z31.s, p7/m, z31.s, #1.0
+fminnmp z0.h, p0/m, z0.h, z1.h
+fminnmp z29.s, p3/m, z29.s, z30.s
+fminnmp z31.d, p7/m, z31.d, z30.d
+fminnmv d0, p7, z31.d
+fminnmv h0, p7, z31.h
+fminnmv s0, p7, z31.s
+fminp z0.h, p0/m, z0.h, z1.h
+fminp z29.s, p3/m, z29.s, z30.s
+fminp z31.d, p7/m, z31.d, z30.d
+fminv d0, p7, z31.d
+fminv h0, p7, z31.h
+fminv s0, p7, z31.s
+fmla z0.d, p7/m, z1.d, z31.d
+fmla z0.d, z1.d, z7.d[1]
+fmla z0.h, p7/m, z1.h, z31.h
+fmla z0.h, z1.h, z7.h[7]
+fmla z0.s, p7/m, z1.s, z31.s
+fmla z0.s, z1.s, z7.s[3]
+fmlalb z0.s, z1.h, z7.h[0]
+fmlalb z29.s, z30.h, z31.h
+fmlalb z30.s, z31.h, z7.h[7]
+fmlalt z0.s, z1.h, z7.h[0]
+fmlalt z29.s, z30.h, z31.h
+fmlalt z30.s, z31.h, z7.h[7]
+fmls z0.d, p7/m, z1.d, z31.d
+fmls z0.d, z1.d, z7.d[1]
+fmls z0.h, p7/m, z1.h, z31.h
+fmls z0.h, z1.h, z7.h[7]
+fmls z0.s, p7/m, z1.s, z31.s
+fmls z0.s, z1.s, z7.s[3]
+fmlslb z0.s, z1.h, z7.h[0]
+fmlslb z29.s, z30.h, z31.h
+fmlslb z30.s, z31.h, z7.h[7]
+fmlslt z0.s, z1.h, z7.h[0]
+fmlslt z29.s, z30.h, z31.h
+fmlslt z30.s, z31.h, z7.h[7]
+fmov z0.d, #-10.00000000
+fmov z0.d, #0.12500000
+fmov z0.d, p0/m, #-10.00000000
+fmov z0.d, p0/m, #0.12500000
+fmov z0.h, #-0.12500000
+fmov z0.h, p0/m, #-0.12500000
+fmov z0.s, #-0.12500000
+fmov z0.s, p0/m, #-0.12500000
+fmsb z0.d, p7/m, z1.d, z31.d
+fmsb z0.h, p7/m, z1.h, z31.h
+fmsb z0.s, p7/m, z1.s, z31.s
+fmul z0.d, p0/m, z0.d, #0.5
+fmul z0.d, p7/m, z0.d, z31.d
+fmul z0.d, z0.d, z0.d[0]
+fmul z0.d, z1.d, z31.d
+fmul z0.h, p0/m, z0.h, #0.5
+fmul z0.h, p7/m, z0.h, z31.h
+fmul z0.h, z0.h, z0.h[0]
+fmul z0.h, z1.h, z31.h
+fmul z0.s, p0/m, z0.s, #0.5
+fmul z0.s, p7/m, z0.s, z31.s
+fmul z0.s, z0.s, z0.s[0]
+fmul z0.s, z1.s, z31.s
+fmul z31.d, p7/m, z31.d, #2.0
+fmul z31.d, z31.d, z15.d[1]
+fmul z31.h, p7/m, z31.h, #2.0
+fmul z31.h, z31.h, z7.h[7]
+fmul z31.s, p7/m, z31.s, #2.0
+fmul z31.s, z31.s, z7.s[3]
+fmulx z0.d, p7/m, z0.d, z31.d
+fmulx z0.h, p7/m, z0.h, z31.h
+fmulx z0.s, p7/m, z0.s, z31.s
+fneg z31.d, p7/m, z31.d
+fneg z31.h, p7/m, z31.h
+fneg z31.s, p7/m, z31.s
+fnmad z0.d, p7/m, z1.d, z31.d
+fnmad z0.h, p7/m, z1.h, z31.h
+fnmad z0.s, p7/m, z1.s, z31.s
+fnmla z0.d, p7/m, z1.d, z31.d
+fnmla z0.h, p7/m, z1.h, z31.h
+fnmla z0.s, p7/m, z1.s, z31.s
+fnmls z0.d, p7/m, z1.d, z31.d
+fnmls z0.h, p7/m, z1.h, z31.h
+fnmls z0.s, p7/m, z1.s, z31.s
+fnmsb z0.d, p7/m, z1.d, z31.d
+fnmsb z0.h, p7/m, z1.h, z31.h
+fnmsb z0.s, p7/m, z1.s, z31.s
+frecpe z0.d, z31.d
+frecpe z0.h, z31.h
+frecpe z0.s, z31.s
+frecps z0.d, z1.d, z31.d
+frecps z0.h, z1.h, z31.h
+frecps z0.s, z1.s, z31.s
+frecpx z31.d, p7/m, z31.d
+frecpx z31.h, p7/m, z31.h
+frecpx z31.s, p7/m, z31.s
+frinta z31.d, p7/m, z31.d
+frinta z31.h, p7/m, z31.h
+frinta z31.s, p7/m, z31.s
+frinti z31.d, p7/m, z31.d
+frinti z31.h, p7/m, z31.h
+frinti z31.s, p7/m, z31.s
+frintm z31.d, p7/m, z31.d
+frintm z31.h, p7/m, z31.h
+frintm z31.s, p7/m, z31.s
+frintn z31.d, p7/m, z31.d
+frintn z31.h, p7/m, z31.h
+frintn z31.s, p7/m, z31.s
+frintp z31.d, p7/m, z31.d
+frintp z31.h, p7/m, z31.h
+frintp z31.s, p7/m, z31.s
+frintx z31.d, p7/m, z31.d
+frintx z31.h, p7/m, z31.h
+frintx z31.s, p7/m, z31.s
+frintz z31.d, p7/m, z31.d
+frintz z31.h, p7/m, z31.h
+frintz z31.s, p7/m, z31.s
+frsqrte z0.d, z31.d
+frsqrte z0.h, z31.h
+frsqrte z0.s, z31.s
+frsqrts z0.d, z1.d, z31.d
+frsqrts z0.h, z1.h, z31.h
+frsqrts z0.s, z1.s, z31.s
+fscale z0.d, p7/m, z0.d, z31.d
+fscale z0.h, p7/m, z0.h, z31.h
+fscale z0.s, p7/m, z0.s, z31.s
+fsqrt z31.d, p7/m, z31.d
+fsqrt z31.h, p7/m, z31.h
+fsqrt z31.s, p7/m, z31.s
+fsub z0.d, p0/m, z0.d, #0.5
+fsub z0.d, p7/m, z0.d, z31.d
+fsub z0.d, z1.d, z31.d
+fsub z0.h, p0/m, z0.h, #0.5
+fsub z0.h, p7/m, z0.h, z31.h
+fsub z0.h, z1.h, z31.h
+fsub z0.s, p0/m, z0.s, #0.5
+fsub z0.s, p7/m, z0.s, z31.s
+fsub z0.s, z1.s, z31.s
+fsub z31.d, p7/m, z31.d, #1.0
+fsub z31.h, p7/m, z31.h, #1.0
+fsub z31.s, p7/m, z31.s, #1.0
+fsubr z0.d, p0/m, z0.d, #0.5
+fsubr z0.d, p7/m, z0.d, z31.d
+fsubr z0.h, p0/m, z0.h, #0.5
+fsubr z0.h, p7/m, z0.h, z31.h
+fsubr z0.s, p0/m, z0.s, #0.5
+fsubr z0.s, p7/m, z0.s, z31.s
+fsubr z31.d, p7/m, z31.d, #1.0
+fsubr z31.h, p7/m, z31.h, #1.0
+fsubr z31.s, p7/m, z31.s, #1.0
+ftmad z0.d, z0.d, z31.d, #7
+ftmad z0.h, z0.h, z31.h, #7
+ftmad z0.s, z0.s, z31.s, #7
+ftsmul z0.d, z1.d, z31.d
+ftsmul z0.h, z1.h, z31.h
+ftsmul z0.s, z1.s, z31.s
+ftssel z0.d, z1.d, z31.d
+ftssel z0.h, z1.h, z31.h
+ftssel z0.s, z1.s, z31.s
+histcnt z0.s, p0/z, z1.s, z2.s
+histcnt z29.d, p7/z, z30.d, z31.d
+histseg z0.b, z1.b, z31.b
+incb x0
+incb x0, #14
+incb x0, all, mul #16
+incb x0, pow2
+incb x0, vl1
+incd x0
+incd x0, #14
+incd x0, all, mul #16
+incd x0, pow2
+incd x0, vl1
+incd z0.d
+incd z0.d, all, mul #16
+inch x0
+inch x0, #14
+inch x0, all, mul #16
+inch x0, pow2
+inch x0, vl1
+inch z0.h
+inch z0.h, all, mul #16
+incp x0, p0.b
+incp x0, p0.d
+incp x0, p0.h
+incp x0, p0.s
+incp xzr, p15.b
+incp xzr, p15.d
+incp xzr, p15.h
+incp xzr, p15.s
+incp z31.d, p15.d
+incp z31.h, p15.h
+incp z31.s, p15.s
+incw x0
+incw x0, #14
+incw x0, all, mul #16
+incw x0, pow2
+incw x0, vl1
+incw z0.s
+incw z0.s, all, mul #16
+index z0.b, #0, #0
+index z0.d, #0, #0
+index z0.h, #0, #0
+index z0.h, w0, w0
+index z0.s, #0, #0
+index z21.b, w10, w21
+index z21.d, x10, x21
+index z21.s, w10, w21
+index z23.b, #13, w8
+index z23.b, w13, #8
+index z23.d, #13, x8
+index z23.d, x13, #8
+index z23.h, #13, w8
+index z23.h, w13, #8
+index z23.s, #13, w8
+index z23.s, w13, #8
+index z31.b, #-1, #-1
+index z31.b, #-1, wzr
+index z31.b, wzr, #-1
+index z31.b, wzr, wzr
+index z31.d, #-1, #-1
+index z31.d, #-1, xzr
+index z31.d, xzr, #-1
+index z31.d, xzr, xzr
+index z31.h, #-1, #-1
+index z31.h, #-1, wzr
+index z31.h, wzr, #-1
+index z31.h, wzr, wzr
+index z31.s, #-1, #-1
+index z31.s, #-1, wzr
+index z31.s, wzr, #-1
+index z31.s, wzr, wzr
+insr z0.b, w0
+insr z0.d, x0
+insr z0.h, w0
+insr z0.s, w0
+insr z31.b, b31
+insr z31.b, wzr
+insr z31.d, d31
+insr z31.d, xzr
+insr z31.h, h31
+insr z31.h, wzr
+insr z31.s, s31
+insr z31.s, wzr
+lasta b0, p7, z31.b
+lasta d0, p7, z31.d
+lasta h0, p7, z31.h
+lasta s0, p7, z31.s
+lasta w0, p7, z31.b
+lasta w0, p7, z31.h
+lasta w0, p7, z31.s
+lasta x0, p7, z31.d
+lastb b0, p7, z31.b
+lastb d0, p7, z31.d
+lastb h0, p7, z31.h
+lastb s0, p7, z31.s
+lastb w0, p7, z31.b
+lastb w0, p7, z31.h
+lastb w0, p7, z31.s
+lastb x0, p7, z31.d
+ld1b { z0.b }, p0/z, [sp, x0]
+ld1b { z0.b }, p0/z, [x0, x0]
+ld1b { z0.b }, p0/z, [x0]
+ld1b { z0.d }, p0/z, [x0]
+ld1b { z0.d }, p0/z, [z0.d]
+ld1b { z0.h }, p0/z, [x0]
+ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+ld1b { z0.s }, p0/z, [x0]
+ld1b { z0.s }, p0/z, [z0.s]
+ld1b { z21.b }, p5/z, [x10, #5, mul vl]
+ld1b { z21.d }, p5/z, [x10, #5, mul vl]
+ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1b { z21.h }, p5/z, [x10, #5, mul vl]
+ld1b { z21.s }, p5/z, [x10, #5, mul vl]
+ld1b { z21.s }, p5/z, [x10, x21]
+ld1b { z23.d }, p3/z, [x13, x8]
+ld1b { z31.b }, p7/z, [sp, #-1, mul vl]
+ld1b { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1b { z31.d }, p7/z, [sp, z31.d]
+ld1b { z31.d }, p7/z, [z31.d, #31]
+ld1b { z31.h }, p7/z, [sp, #-1, mul vl]
+ld1b { z31.s }, p7/z, [sp, #-1, mul vl]
+ld1b { z31.s }, p7/z, [z31.s, #31]
+ld1b { z5.h }, p3/z, [x17, x16]
+ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+ld1d { z0.d }, p0/z, [x0]
+ld1d { z0.d }, p0/z, [z0.d]
+ld1d { z21.d }, p5/z, [x10, #5, mul vl]
+ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1d { z23.d }, p3/z, [sp, x8, lsl #3]
+ld1d { z23.d }, p3/z, [x13, x8, lsl #3]
+ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+ld1d { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1d { z31.d }, p7/z, [sp, z31.d]
+ld1d { z31.d }, p7/z, [z31.d, #248]
+ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+ld1h { z0.d }, p0/z, [x0]
+ld1h { z0.d }, p0/z, [z0.d]
+ld1h { z0.h }, p0/z, [x0]
+ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+ld1h { z0.s }, p0/z, [x0]
+ld1h { z0.s }, p0/z, [z0.s]
+ld1h { z21.d }, p5/z, [x10, #5, mul vl]
+ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1h { z21.h }, p5/z, [x10, #5, mul vl]
+ld1h { z21.s }, p5/z, [x10, #5, mul vl]
+ld1h { z21.s }, p5/z, [x10, x21, lsl #1]
+ld1h { z23.d }, p3/z, [x13, x8, lsl #1]
+ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1h { z31.d }, p7/z, [sp, z31.d]
+ld1h { z31.d }, p7/z, [z31.d, #62]
+ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
+ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
+ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+ld1h { z31.s }, p7/z, [z31.s, #62]
+ld1h { z5.h }, p3/z, [sp, x16, lsl #1]
+ld1h { z5.h }, p3/z, [x17, x16, lsl #1]
+ld1rb { z0.b }, p0/z, [x0]
+ld1rb { z0.d }, p0/z, [x0]
+ld1rb { z0.h }, p0/z, [x0]
+ld1rb { z0.s }, p0/z, [x0]
+ld1rb { z31.b }, p7/z, [sp, #63]
+ld1rb { z31.d }, p7/z, [sp, #63]
+ld1rb { z31.h }, p7/z, [sp, #63]
+ld1rb { z31.s }, p7/z, [sp, #63]
+ld1rd { z0.d }, p0/z, [x0]
+ld1rd { z31.d }, p7/z, [sp, #504]
+ld1rh { z0.d }, p0/z, [x0]
+ld1rh { z0.h }, p0/z, [x0]
+ld1rh { z0.s }, p0/z, [x0]
+ld1rh { z31.d }, p7/z, [sp, #126]
+ld1rh { z31.h }, p7/z, [sp, #126]
+ld1rh { z31.s }, p7/z, [sp, #126]
+ld1rqb { z0.b }, p0/z, [x0, x0]
+ld1rqb { z0.b }, p0/z, [x0]
+ld1rqb { z21.b }, p5/z, [x10, #112]
+ld1rqb { z23.b }, p3/z, [x13, #-128]
+ld1rqb { z31.b }, p7/z, [sp, #-16]
+ld1rqd { z0.d }, p0/z, [x0, x0, lsl #3]
+ld1rqd { z0.d }, p0/z, [x0]
+ld1rqd { z23.d }, p3/z, [x13, #-128]
+ld1rqd { z23.d }, p3/z, [x13, #112]
+ld1rqd { z31.d }, p7/z, [sp, #-16]
+ld1rqh { z0.h }, p0/z, [x0, x0, lsl #1]
+ld1rqh { z0.h }, p0/z, [x0]
+ld1rqh { z23.h }, p3/z, [x13, #-128]
+ld1rqh { z23.h }, p3/z, [x13, #112]
+ld1rqh { z31.h }, p7/z, [sp, #-16]
+ld1rqw { z0.s }, p0/z, [x0, x0, lsl #2]
+ld1rqw { z0.s }, p0/z, [x0]
+ld1rqw { z23.s }, p3/z, [x13, #-128]
+ld1rqw { z23.s }, p3/z, [x13, #112]
+ld1rqw { z31.s }, p7/z, [sp, #-16]
+ld1rsb { z0.d }, p0/z, [x0]
+ld1rsb { z0.h }, p0/z, [x0]
+ld1rsb { z0.s }, p0/z, [x0]
+ld1rsb { z31.d }, p7/z, [sp, #63]
+ld1rsb { z31.h }, p7/z, [sp, #63]
+ld1rsb { z31.s }, p7/z, [sp, #63]
+ld1rsh { z0.d }, p0/z, [x0]
+ld1rsh { z0.s }, p0/z, [x0]
+ld1rsh { z31.d }, p7/z, [sp, #126]
+ld1rsh { z31.s }, p7/z, [sp, #126]
+ld1rsw { z0.d }, p0/z, [x0]
+ld1rsw { z31.d }, p7/z, [sp, #252]
+ld1rw { z0.d }, p0/z, [x0]
+ld1rw { z0.s }, p0/z, [x0]
+ld1rw { z31.d }, p7/z, [sp, #252]
+ld1rw { z31.s }, p7/z, [sp, #252]
+ld1sb { z0.d }, p0/z, [x0]
+ld1sb { z0.d }, p0/z, [z0.d]
+ld1sb { z0.h }, p0/z, [sp, x0]
+ld1sb { z0.h }, p0/z, [x0, x0]
+ld1sb { z0.h }, p0/z, [x0]
+ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+ld1sb { z0.s }, p0/z, [x0]
+ld1sb { z0.s }, p0/z, [z0.s]
+ld1sb { z21.d }, p5/z, [x10, #5, mul vl]
+ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1sb { z21.h }, p5/z, [x10, #5, mul vl]
+ld1sb { z21.s }, p5/z, [x10, #5, mul vl]
+ld1sb { z21.s }, p5/z, [x10, x21]
+ld1sb { z23.d }, p3/z, [x13, x8]
+ld1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1sb { z31.d }, p7/z, [sp, z31.d]
+ld1sb { z31.d }, p7/z, [z31.d, #31]
+ld1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+ld1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+ld1sb { z31.s }, p7/z, [z31.s, #31]
+ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+ld1sh { z0.d }, p0/z, [x0]
+ld1sh { z0.d }, p0/z, [z0.d]
+ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+ld1sh { z0.s }, p0/z, [x0]
+ld1sh { z0.s }, p0/z, [z0.s]
+ld1sh { z21.d }, p5/z, [x10, #5, mul vl]
+ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1sh { z21.s }, p5/z, [sp, x21, lsl #1]
+ld1sh { z21.s }, p5/z, [x10, #5, mul vl]
+ld1sh { z21.s }, p5/z, [x10, x21, lsl #1]
+ld1sh { z23.d }, p3/z, [x13, x8, lsl #1]
+ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+ld1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1sh { z31.d }, p7/z, [sp, z31.d]
+ld1sh { z31.d }, p7/z, [z31.d, #62]
+ld1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+ld1sh { z31.s }, p7/z, [z31.s, #62]
+ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+ld1sw { z0.d }, p0/z, [x0]
+ld1sw { z0.d }, p0/z, [z0.d]
+ld1sw { z21.d }, p5/z, [x10, #5, mul vl]
+ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1sw { z23.d }, p3/z, [sp, x8, lsl #2]
+ld1sw { z23.d }, p3/z, [x13, x8, lsl #2]
+ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+ld1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1sw { z31.d }, p7/z, [sp, z31.d]
+ld1sw { z31.d }, p7/z, [z31.d, #124]
+ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+ld1w { z0.d }, p0/z, [x0]
+ld1w { z0.d }, p0/z, [z0.d]
+ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+ld1w { z0.s }, p0/z, [x0]
+ld1w { z0.s }, p0/z, [z0.s]
+ld1w { z21.d }, p5/z, [x10, #5, mul vl]
+ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+ld1w { z21.s }, p5/z, [sp, x21, lsl #2]
+ld1w { z21.s }, p5/z, [x10, #5, mul vl]
+ld1w { z21.s }, p5/z, [x10, x21, lsl #2]
+ld1w { z23.d }, p3/z, [x13, x8, lsl #2]
+ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+ld1w { z31.d }, p7/z, [sp, #-1, mul vl]
+ld1w { z31.d }, p7/z, [sp, z31.d]
+ld1w { z31.d }, p7/z, [z31.d, #124]
+ld1w { z31.s }, p7/z, [sp, #-1, mul vl]
+ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+ld1w { z31.s }, p7/z, [z31.s, #124]
+ld2b { z0.b, z1.b }, p0/z, [x0, x0]
+ld2b { z0.b, z1.b }, p0/z, [x0]
+ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+ld2b { z5.b, z6.b }, p3/z, [x17, x16]
+ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+ld2d { z0.d, z1.d }, p0/z, [x0]
+ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+ld2h { z0.h, z1.h }, p0/z, [x0]
+ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+ld2w { z0.s, z1.s }, p0/z, [x0]
+ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0]
+ld3b { z0.b, z1.b, z2.b }, p0/z, [x0]
+ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl]
+ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl]
+ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16]
+ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3]
+ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
+ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl]
+ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl]
+ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3]
+ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1]
+ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
+ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl]
+ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl]
+ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1]
+ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2]
+ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
+ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl]
+ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl]
+ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2]
+ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0]
+ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
+ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl]
+ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl]
+ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16]
+ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3]
+ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
+ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl]
+ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl]
+ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3]
+ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1]
+ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
+ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl]
+ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl]
+ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1]
+ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2]
+ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
+ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl]
+ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl]
+ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2]
+ldff1b { z0.d }, p0/z, [x0, x0]
+ldff1b { z0.d }, p0/z, [z0.d]
+ldff1b { z0.h }, p0/z, [x0, x0]
+ldff1b { z0.s }, p0/z, [x0, x0]
+ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+ldff1b { z0.s }, p0/z, [z0.s]
+ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1b { z31.b }, p7/z, [sp]
+ldff1b { z31.d }, p7/z, [sp, z31.d]
+ldff1b { z31.d }, p7/z, [sp]
+ldff1b { z31.d }, p7/z, [z31.d, #31]
+ldff1b { z31.h }, p7/z, [sp]
+ldff1b { z31.s }, p7/z, [sp]
+ldff1b { z31.s }, p7/z, [z31.s, #31]
+ldff1d { z0.d }, p0/z, [x0, x0, lsl #3]
+ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+ldff1d { z0.d }, p0/z, [z0.d]
+ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+ldff1d { z31.d }, p7/z, [sp, z31.d]
+ldff1d { z31.d }, p7/z, [sp]
+ldff1d { z31.d }, p7/z, [z31.d, #248]
+ldff1h { z0.d }, p0/z, [x0, x0, lsl #1]
+ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+ldff1h { z0.d }, p0/z, [z0.d]
+ldff1h { z0.h }, p0/z, [x0, x0, lsl #1]
+ldff1h { z0.s }, p0/z, [x0, x0, lsl #1]
+ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+ldff1h { z0.s }, p0/z, [z0.s]
+ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+ldff1h { z31.d }, p7/z, [sp, z31.d]
+ldff1h { z31.d }, p7/z, [sp]
+ldff1h { z31.d }, p7/z, [z31.d, #62]
+ldff1h { z31.h }, p7/z, [sp]
+ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+ldff1h { z31.s }, p7/z, [sp]
+ldff1h { z31.s }, p7/z, [z31.s, #62]
+ldff1sb { z0.d }, p0/z, [x0, x0]
+ldff1sb { z0.d }, p0/z, [z0.d]
+ldff1sb { z0.h }, p0/z, [x0, x0]
+ldff1sb { z0.s }, p0/z, [x0, x0]
+ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
+ldff1sb { z0.s }, p0/z, [z0.s]
+ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1sb { z31.d }, p7/z, [sp, z31.d]
+ldff1sb { z31.d }, p7/z, [sp]
+ldff1sb { z31.d }, p7/z, [z31.d, #31]
+ldff1sb { z31.h }, p7/z, [sp]
+ldff1sb { z31.s }, p7/z, [sp]
+ldff1sb { z31.s }, p7/z, [z31.s, #31]
+ldff1sh { z0.d }, p0/z, [x0, x0, lsl #1]
+ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+ldff1sh { z0.d }, p0/z, [z0.d]
+ldff1sh { z0.s }, p0/z, [x0, x0, lsl #1]
+ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+ldff1sh { z0.s }, p0/z, [z0.s]
+ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+ldff1sh { z31.d }, p7/z, [sp, z31.d]
+ldff1sh { z31.d }, p7/z, [sp]
+ldff1sh { z31.d }, p7/z, [z31.d, #62]
+ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+ldff1sh { z31.s }, p7/z, [sp]
+ldff1sh { z31.s }, p7/z, [z31.s, #62]
+ldff1sw { z0.d }, p0/z, [x0, x0, lsl #2]
+ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+ldff1sw { z0.d }, p0/z, [z0.d]
+ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+ldff1sw { z31.d }, p7/z, [sp, z31.d]
+ldff1sw { z31.d }, p7/z, [sp]
+ldff1sw { z31.d }, p7/z, [z31.d, #124]
+ldff1w { z0.d }, p0/z, [x0, x0, lsl #2]
+ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+ldff1w { z0.d }, p0/z, [z0.d]
+ldff1w { z0.s }, p0/z, [x0, x0, lsl #2]
+ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+ldff1w { z0.s }, p0/z, [z0.s]
+ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+ldff1w { z31.d }, p7/z, [sp, z31.d]
+ldff1w { z31.d }, p7/z, [sp]
+ldff1w { z31.d }, p7/z, [z31.d, #124]
+ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+ldff1w { z31.s }, p7/z, [sp]
+ldff1w { z31.s }, p7/z, [z31.s, #124]
+ldnf1b { z0.b }, p0/z, [x0]
+ldnf1b { z0.d }, p0/z, [x0]
+ldnf1b { z0.h }, p0/z, [x0]
+ldnf1b { z0.s }, p0/z, [x0]
+ldnf1b { z21.b }, p5/z, [x10, #5, mul vl]
+ldnf1b { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1b { z21.h }, p5/z, [x10, #5, mul vl]
+ldnf1b { z21.s }, p5/z, [x10, #5, mul vl]
+ldnf1b { z31.b }, p7/z, [sp, #-1, mul vl]
+ldnf1b { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1b { z31.h }, p7/z, [sp, #-1, mul vl]
+ldnf1b { z31.s }, p7/z, [sp, #-1, mul vl]
+ldnf1d { z0.d }, p0/z, [x0]
+ldnf1d { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1d { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1h { z0.d }, p0/z, [x0]
+ldnf1h { z0.h }, p0/z, [x0]
+ldnf1h { z0.s }, p0/z, [x0]
+ldnf1h { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1h { z21.h }, p5/z, [x10, #5, mul vl]
+ldnf1h { z21.s }, p5/z, [x10, #5, mul vl]
+ldnf1h { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1h { z31.h }, p7/z, [sp, #-1, mul vl]
+ldnf1h { z31.s }, p7/z, [sp, #-1, mul vl]
+ldnf1sb { z0.d }, p0/z, [x0]
+ldnf1sb { z0.h }, p0/z, [x0]
+ldnf1sb { z0.s }, p0/z, [x0]
+ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1sb { z21.h }, p5/z, [x10, #5, mul vl]
+ldnf1sb { z21.s }, p5/z, [x10, #5, mul vl]
+ldnf1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+ldnf1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+ldnf1sh { z0.d }, p0/z, [x0]
+ldnf1sh { z0.s }, p0/z, [x0]
+ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1sh { z21.s }, p5/z, [x10, #5, mul vl]
+ldnf1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+ldnf1sw { z0.d }, p0/z, [x0]
+ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1w { z0.d }, p0/z, [x0]
+ldnf1w { z0.s }, p0/z, [x0]
+ldnf1w { z21.d }, p5/z, [x10, #5, mul vl]
+ldnf1w { z21.s }, p5/z, [x10, #5, mul vl]
+ldnf1w { z31.d }, p7/z, [sp, #-1, mul vl]
+ldnf1w { z31.s }, p7/z, [sp, #-1, mul vl]
+ldnt1b { z0.b }, p0/z, [x0, x0]
+ldnt1b { z0.b }, p0/z, [x0]
+ldnt1b { z0.d }, p0/z, [z1.d]
+ldnt1b { z0.s }, p0/z, [z1.s]
+ldnt1b { z21.b }, p5/z, [x10, #7, mul vl]
+ldnt1b { z23.b }, p3/z, [x13, #-8, mul vl]
+ldnt1b { z31.d }, p7/z, [z31.d, x0]
+ldnt1b { z31.d }, p7/z, [z31.d]
+ldnt1b { z31.s }, p7/z, [z31.s, x0]
+ldnt1b { z31.s }, p7/z, [z31.s]
+ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3]
+ldnt1d { z0.d }, p0/z, [x0]
+ldnt1d { z0.d }, p0/z, [z1.d]
+ldnt1d { z21.d }, p5/z, [x10, #7, mul vl]
+ldnt1d { z23.d }, p3/z, [x13, #-8, mul vl]
+ldnt1d { z31.d }, p7/z, [z31.d, x0]
+ldnt1d { z31.d }, p7/z, [z31.d]
+ldnt1h { z0.d }, p0/z, [z1.d]
+ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1]
+ldnt1h { z0.h }, p0/z, [x0]
+ldnt1h { z0.s }, p0/z, [z1.s]
+ldnt1h { z21.h }, p5/z, [x10, #7, mul vl]
+ldnt1h { z23.h }, p3/z, [x13, #-8, mul vl]
+ldnt1h { z31.d }, p7/z, [z31.d, x0]
+ldnt1h { z31.d }, p7/z, [z31.d]
+ldnt1h { z31.s }, p7/z, [z31.s, x0]
+ldnt1h { z31.s }, p7/z, [z31.s]
+ldnt1sb { z0.d }, p0/z, [z1.d]
+ldnt1sb { z0.s }, p0/z, [z1.s]
+ldnt1sb { z31.d }, p7/z, [z31.d, x0]
+ldnt1sb { z31.d }, p7/z, [z31.d]
+ldnt1sb { z31.s }, p7/z, [z31.s, x0]
+ldnt1sb { z31.s }, p7/z, [z31.s]
+ldnt1sh { z0.d }, p0/z, [z1.d]
+ldnt1sh { z0.s }, p0/z, [z1.s]
+ldnt1sh { z31.d }, p7/z, [z31.d, x0]
+ldnt1sh { z31.d }, p7/z, [z31.d]
+ldnt1sh { z31.s }, p7/z, [z31.s, x0]
+ldnt1sh { z31.s }, p7/z, [z31.s]
+ldnt1sw { z0.d }, p0/z, [z1.d]
+ldnt1sw { z31.d }, p7/z, [z31.d, x0]
+ldnt1sw { z31.d }, p7/z, [z31.d]
+ldnt1w { z0.d }, p0/z, [z1.d]
+ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2]
+ldnt1w { z0.s }, p0/z, [x0]
+ldnt1w { z0.s }, p0/z, [z1.s]
+ldnt1w { z21.s }, p5/z, [x10, #7, mul vl]
+ldnt1w { z23.s }, p3/z, [x13, #-8, mul vl]
+ldnt1w { z31.d }, p7/z, [z31.d, x0]
+ldnt1w { z31.d }, p7/z, [z31.d]
+ldnt1w { z31.s }, p7/z, [z31.s, x0]
+ldnt1w { z31.s }, p7/z, [z31.s]
+ldr p0, [x0]
+ldr p5, [x10, #255, mul vl]
+ldr p7, [x13, #-256, mul vl]
+ldr z0, [x0]
+ldr z23, [x13, #255, mul vl]
+ldr z31, [sp, #-256, mul vl]
+lsl z0.b, p0/m, z0.b, #0
+lsl z0.b, p0/m, z0.b, z0.b
+lsl z0.b, p0/m, z0.b, z1.d
+lsl z0.b, z0.b, #0
+lsl z0.b, z1.b, z2.d
+lsl z0.d, p0/m, z0.d, #0
+lsl z0.d, p0/m, z0.d, z0.d
+lsl z0.d, z0.d, #0
+lsl z0.h, p0/m, z0.h, #0
+lsl z0.h, p0/m, z0.h, z0.h
+lsl z0.h, p0/m, z0.h, z1.d
+lsl z0.h, z0.h, #0
+lsl z0.h, z1.h, z2.d
+lsl z0.s, p0/m, z0.s, #0
+lsl z0.s, p0/m, z0.s, z0.s
+lsl z0.s, p0/m, z0.s, z1.d
+lsl z0.s, z0.s, #0
+lsl z0.s, z1.s, z2.d
+lsl z31.b, p0/m, z31.b, #7
+lsl z31.b, z31.b, #7
+lsl z31.d, p0/m, z31.d, #63
+lsl z31.d, z31.d, #63
+lsl z31.h, p0/m, z31.h, #15
+lsl z31.h, z31.h, #15
+lsl z31.s, p0/m, z31.s, #31
+lsl z31.s, z31.s, #31
+lslr z0.b, p0/m, z0.b, z0.b
+lslr z0.d, p0/m, z0.d, z0.d
+lslr z0.h, p0/m, z0.h, z0.h
+lslr z0.s, p0/m, z0.s, z0.s
+lsr z0.b, p0/m, z0.b, #1
+lsr z0.b, p0/m, z0.b, z0.b
+lsr z0.b, p0/m, z0.b, z1.d
+lsr z0.b, z0.b, #1
+lsr z0.b, z1.b, z2.d
+lsr z0.d, p0/m, z0.d, #1
+lsr z0.d, p0/m, z0.d, z0.d
+lsr z0.d, z0.d, #1
+lsr z0.h, p0/m, z0.h, #1
+lsr z0.h, p0/m, z0.h, z0.h
+lsr z0.h, p0/m, z0.h, z1.d
+lsr z0.h, z0.h, #1
+lsr z0.h, z1.h, z2.d
+lsr z0.s, p0/m, z0.s, #1
+lsr z0.s, p0/m, z0.s, z0.s
+lsr z0.s, p0/m, z0.s, z1.d
+lsr z0.s, z0.s, #1
+lsr z0.s, z1.s, z2.d
+lsr z31.b, p0/m, z31.b, #8
+lsr z31.b, z31.b, #8
+lsr z31.d, p0/m, z31.d, #64
+lsr z31.d, z31.d, #64
+lsr z31.h, p0/m, z31.h, #16
+lsr z31.h, z31.h, #16
+lsr z31.s, p0/m, z31.s, #32
+lsr z31.s, z31.s, #32
+lsrr z0.b, p0/m, z0.b, z0.b
+lsrr z0.d, p0/m, z0.d, z0.d
+lsrr z0.h, p0/m, z0.h, z0.h
+lsrr z0.s, p0/m, z0.s, z0.s
+mad z0.b, p7/m, z1.b, z31.b
+mad z0.d, p7/m, z1.d, z31.d
+mad z0.h, p7/m, z1.h, z31.h
+mad z0.s, p7/m, z1.s, z31.s
+match p0.b, p0/z, z0.b, z0.b
+match p0.h, p0/z, z0.h, z0.h
+match p15.b, p7/z, z30.b, z31.b
+match p15.h, p7/z, z30.h, z31.h
+mla z0.b, p7/m, z1.b, z31.b
+mla z0.d, p7/m, z1.d, z31.d
+mla z0.d, z1.d, z7.d[1]
+mla z0.h, p7/m, z1.h, z31.h
+mla z0.h, z1.h, z7.h[7]
+mla z0.s, p7/m, z1.s, z31.s
+mla z0.s, z1.s, z7.s[3]
+mls z0.b, p7/m, z1.b, z31.b
+mls z0.d, p7/m, z1.d, z31.d
+mls z0.d, z1.d, z7.d[1]
+mls z0.h, p7/m, z1.h, z31.h
+mls z0.h, z1.h, z7.h[7]
+mls z0.s, p7/m, z1.s, z31.s
+mls z0.s, z1.s, z7.s[3]
+mov p0.b, p0.b
+mov p0.b, p0/m, p0.b
+mov p0.b, p0/z, p0.b
+mov p15.b, p15.b
+mov p15.b, p15/m, p15.b
+mov p15.b, p15/z, p15.b
+mov z0.b, #127
+mov z0.b, b0
+mov z0.b, p0/m, b0
+mov z0.b, p0/m, w0
+mov z0.b, p0/z, #127
+mov z0.b, w0
+mov z0.d, #0
+mov z0.d, #0xe0000000000003ff
+mov z0.d, #0xffffffffffff7fff
+mov z0.d, #32768
+mov z0.d, d0
+mov z0.d, p0/m, d0
+mov z0.d, p0/m, x0
+mov z0.d, x0
+mov z0.d, z0.d
+mov z0.h, #-256
+mov z0.h, #-32768
+mov z0.h, #0
+mov z0.h, #32512
+mov z0.h, #32767
+mov z0.h, h0
+mov z0.h, p0/m, h0
+mov z0.h, p0/m, w0
+mov z0.h, p0/z, #32512
+mov z0.h, w0
+mov z0.q, q0
+mov z0.s, #0
+mov z0.s, #0xffff7fff
+mov z0.s, #32768
+mov z0.s, p0/m, s0
+mov z0.s, p0/m, w0
+mov z0.s, s0
+mov z0.s, w0
+mov z21.d, #-128
+mov z21.d, #-32768
+mov z21.d, #127
+mov z21.d, #32512
+mov z21.d, p0/z, #-128
+mov z21.d, p0/z, #-32768
+mov z21.d, p0/z, #127
+mov z21.d, p0/z, #32512
+mov z21.d, p15/m, #-128
+mov z21.d, p15/m, #-32768
+mov z21.h, #-128
+mov z21.h, #-32768
+mov z21.h, #127
+mov z21.h, #32512
+mov z21.h, p0/z, #-128
+mov z21.h, p0/z, #-32768
+mov z21.h, p0/z, #127
+mov z21.h, p0/z, #32512
+mov z21.h, p15/m, #-128
+mov z21.h, p15/m, #-32768
+mov z21.s, #-128
+mov z21.s, #-32768
+mov z21.s, #127
+mov z21.s, #32512
+mov z21.s, p0/z, #-128
+mov z21.s, p0/z, #-32768
+mov z21.s, p0/z, #127
+mov z21.s, p0/z, #32512
+mov z21.s, p15/m, #-128
+mov z21.s, p15/m, #-32768
+mov z31.b, p15/m, z31.b
+mov z31.b, p7/m, b31
+movprfx z31, z6
+mov z31.b, p7/m, wsp
+mov z31.b, wsp
+mov z31.b, z31.b[63]
+mov z31.d, p15/m, z31.d
+mov z31.d, p7/m, d31
+movprfx z31.d, p7/z, z6.d
+mov z31.d, p7/m, sp
+mov z31.d, sp
+mov z31.d, z0.d
+mov z31.d, z31.d[7]
+mov z31.h, p15/m, z31.h
+mov z31.h, p7/m, h31
+mov z31.h, p7/m, wsp
+mov z31.h, wsp
+mov z31.h, z31.h[31]
+mov z31.s, p15/m, z31.s
+mov z31.s, p7/m, s31
+mov z31.s, p7/m, wsp
+mov z31.s, wsp
+mov z31.s, z31.s[15]
+mov z5.b, #-1
+mov z5.b, #-128
+mov z5.b, #127
+mov z5.b, p0/z, #-1
+mov z5.b, p0/z, #-128
+mov z5.b, p0/z, #127
+mov z5.b, p15/m, #-128
+mov z5.d, #-6
+mov z5.h, #-6
+mov z5.q, z17.q[3]
+mov z5.s, #-6
+movs p0.b, p0.b
+movs p0.b, p0/z, p0.b
+movs p15.b, p15.b
+movs p15.b, p15/z, p15.b
+mrs x3, ID_AA64ZFR0_EL1
+mrs x3, ZCR_EL1
+mrs x3, ZCR_EL12
+mrs x3, ZCR_EL2
+mrs x3, ZCR_EL3
+msb z0.b, p7/m, z1.b, z31.b
+msb z0.d, p7/m, z1.d, z31.d
+msb z0.h, p7/m, z1.h, z31.h
+msb z0.s, p7/m, z1.s, z31.s
+msr ZCR_EL1, x3
+msr ZCR_EL12, x3
+msr ZCR_EL2, x3
+msr ZCR_EL3, x3
+mul z0.b, p7/m, z0.b, z31.b
+mul z0.b, z1.b, z2.b
+mul z0.d, p7/m, z0.d, z31.d
+mul z0.d, z1.d, z15.d[1]
+mul z0.h, p7/m, z0.h, z31.h
+mul z0.h, z1.h, z2.h
+mul z0.h, z1.h, z7.h[7]
+mul z0.s, p7/m, z0.s, z31.s
+mul z0.s, z1.s, z7.s[3]
+mul z29.s, z30.s, z31.s
+mul z31.b, z31.b, #-128
+mul z31.b, z31.b, #127
+mul z31.d, z31.d, #-128
+mul z31.d, z31.d, #127
+mul z31.d, z31.d, z31.d
+mul z31.h, z31.h, #-128
+mul z31.h, z31.h, #127
+mul z31.s, z31.s, #-128
+mul z31.s, z31.s, #127
+nand p0.b, p0/z, p0.b, p0.b
+nand p15.b, p15/z, p15.b, p15.b
+nands p0.b, p0/z, p0.b, p0.b
+nands p15.b, p15/z, p15.b, p15.b
+nbsl z0.d, z0.d, z1.d, z2.d
+neg z0.b, p0/m, z0.b
+neg z0.d, p0/m, z0.d
+neg z0.h, p0/m, z0.h
+neg z0.s, p0/m, z0.s
+neg z31.b, p7/m, z31.b
+neg z31.d, p7/m, z31.d
+neg z31.h, p7/m, z31.h
+neg z31.s, p7/m, z31.s
+nmatch p0.b, p0/z, z0.b, z0.b
+nmatch p0.h, p0/z, z0.h, z0.h
+nmatch p15.b, p7/z, z30.b, z31.b
+nmatch p15.h, p7/z, z30.h, z31.h
+nor p0.b, p0/z, p0.b, p0.b
+nor p15.b, p15/z, p15.b, p15.b
+nors p0.b, p0/z, p0.b, p0.b
+nors p15.b, p15/z, p15.b, p15.b
+not p0.b, p0/z, p0.b
+not p15.b, p15/z, p15.b
+not z31.b, p7/m, z31.b
+not z31.d, p7/m, z31.d
+not z31.h, p7/m, z31.h
+not z31.s, p7/m, z31.s
+nots p0.b, p0/z, p0.b
+nots p15.b, p15/z, p15.b
+orn p0.b, p0/z, p0.b, p0.b
+orn p15.b, p15/z, p15.b, p15.b
+orns p0.b, p0/z, p0.b, p0.b
+orns p15.b, p15/z, p15.b, p15.b
+orr p0.b, p0/z, p0.b, p1.b
+orr z0.d, z0.d, #0x6
+orr z0.d, z0.d, #0xfffffffffffffff9
+orr z0.s, z0.s, #0x6
+orr z0.s, z0.s, #0xfffffff9
+orr z23.d, z13.d, z8.d
+orr z23.h, z23.h, #0x6
+orr z23.h, z23.h, #0xfff9
+orr z31.b, p7/m, z31.b, z31.b
+orr z31.d, p7/m, z31.d, z31.d
+orr z31.h, p7/m, z31.h, z31.h
+orr z31.s, p7/m, z31.s, z31.s
+orr z5.b, z5.b, #0x6
+orr z5.b, z5.b, #0xf9
+orrs p0.b, p0/z, p0.b, p1.b
+orv b0, p7, z31.b
+orv d0, p7, z31.d
+orv h0, p7, z31.h
+orv s0, p7, z31.s
+pfalse p15.b
+pfirst p0.b, p15, p0.b
+pfirst p15.b, p15, p15.b
+pmul z0.b, z1.b, z2.b
+pmul z29.b, z30.b, z31.b
+pmullb z0.h, z1.b, z2.b
+pmullb z29.q, z30.d, z31.d
+pmullb z31.d, z31.s, z31.s
+pmullt z0.h, z1.b, z2.b
+pmullt z29.q, z30.d, z31.d
+pmullt z31.d, z31.s, z31.s
+pnext p0.b, p15, p0.b
+pnext p0.d, p15, p0.d
+pnext p0.h, p15, p0.h
+pnext p0.s, p15, p0.s
+pnext p15.b, p15, p15.b
+prfb #14, p0, [x0]
+prfb #15, p0, [x0]
+prfb #6, p0, [x0]
+prfb #7, p0, [x0]
+prfb #7, p3, [z13.s, #31]
+prfb #7, p3, [z13.s]
+prfb pldl1keep, p0, [x0, z0.d, uxtw]
+prfb pldl1keep, p0, [x0, z0.d]
+prfb pldl1keep, p0, [x0, z0.s, uxtw]
+prfb pldl1keep, p0, [x0]
+prfb pldl1strm, p0, [x0, #-32, mul vl]
+prfb pldl1strm, p0, [x0, #31, mul vl]
+prfb pldl1strm, p0, [x0]
+prfb pldl2keep, p0, [x0]
+prfb pldl2strm, p0, [x0]
+prfb pldl3keep, p0, [x0]
+prfb pldl3strm, p0, [x0]
+prfb pldl3strm, p5, [x10, z21.d, sxtw]
+prfb pldl3strm, p5, [x10, z21.s, uxtw]
+prfb pldl3strm, p5, [z10.d, #31]
+prfb pldl3strm, p5, [z10.d]
+prfb pstl1keep, p0, [x0]
+prfb pstl1strm, p0, [x0]
+prfb pstl2keep, p0, [x0]
+prfb pstl2strm, p0, [x0]
+prfb pstl3keep, p0, [x0]
+prfb pstl3strm, p0, [x0]
+prfd #14, p0, [x0]
+prfd #15, p0, [x0]
+prfd #15, p7, [z31.d, #248]
+prfd #15, p7, [z31.d]
+prfd #15, p7, [z31.s, #248]
+prfd #15, p7, [z31.s]
+prfd #6, p0, [x0]
+prfd #7, p0, [x0]
+prfd pldl1keep, p0, [x0, z0.d, lsl #3]
+prfd pldl1keep, p0, [x0, z0.d, sxtw #3]
+prfd pldl1keep, p0, [x0, z0.d, uxtw #3]
+prfd pldl1keep, p0, [x0, z0.s, sxtw #3]
+prfd pldl1keep, p0, [x0, z0.s, uxtw #3]
+prfd pldl1keep, p0, [x0]
+prfd pldl1strm, p0, [x0, #-32, mul vl]
+prfd pldl1strm, p0, [x0, #31, mul vl]
+prfd pldl1strm, p0, [x0]
+prfd pldl2keep, p0, [x0]
+prfd pldl2strm, p0, [x0]
+prfd pldl3keep, p0, [x0]
+prfd pldl3strm, p0, [x0]
+prfd pstl1keep, p0, [x0]
+prfd pstl1strm, p0, [x0]
+prfd pstl2keep, p0, [x0]
+prfd pstl2strm, p0, [x0]
+prfd pstl3keep, p0, [x0]
+prfd pstl3strm, p0, [x0]
+prfh #14, p0, [x0]
+prfh #15, p0, [x0]
+prfh #15, p7, [z31.d, #62]
+prfh #15, p7, [z31.d]
+prfh #15, p7, [z31.s, #62]
+prfh #15, p7, [z31.s]
+prfh #6, p0, [x0]
+prfh #7, p0, [x0]
+prfh pldl1keep, p0, [x0, z0.d, lsl #1]
+prfh pldl1keep, p0, [x0]
+prfh pldl1strm, p0, [x0, #-32, mul vl]
+prfh pldl1strm, p0, [x0, #31, mul vl]
+prfh pldl1strm, p0, [x0]
+prfh pldl2keep, p0, [x0]
+prfh pldl2strm, p0, [x0]
+prfh pldl3keep, p0, [x0]
+prfh pldl3strm, p0, [x0]
+prfh pldl3strm, p5, [x10, z21.d, sxtw #1]
+prfh pldl3strm, p5, [x10, z21.d, uxtw #1]
+prfh pldl3strm, p5, [x10, z21.s, sxtw #1]
+prfh pldl3strm, p5, [x10, z21.s, uxtw #1]
+prfh pstl1keep, p0, [x0]
+prfh pstl1strm, p0, [x0]
+prfh pstl2keep, p0, [x0]
+prfh pstl2strm, p0, [x0]
+prfh pstl3keep, p0, [x0]
+prfh pstl3strm, p0, [x0]
+prfw #14, p0, [x0]
+prfw #15, p0, [x0]
+prfw #15, p7, [z31.d, #124]
+prfw #15, p7, [z31.d]
+prfw #15, p7, [z31.s, #124]
+prfw #15, p7, [z31.s]
+prfw #6, p0, [x0]
+prfw #7, p0, [x0]
+prfw #7, p3, [x13, z8.d, uxtw #2]
+prfw pldl1keep, p0, [x0, z0.d, sxtw #2]
+prfw pldl1keep, p0, [x0, z0.s, uxtw #2]
+prfw pldl1keep, p0, [x0]
+prfw pldl1strm, p0, [x0, #-32, mul vl]
+prfw pldl1strm, p0, [x0, #31, mul vl]
+prfw pldl1strm, p0, [x0]
+prfw pldl2keep, p0, [x0]
+prfw pldl2strm, p0, [x0]
+prfw pldl3keep, p0, [x0]
+prfw pldl3strm, p0, [x0]
+prfw pldl3strm, p5, [x10, z21.d, lsl #2]
+prfw pldl3strm, p5, [x10, z21.s, sxtw #2]
+prfw pstl1keep, p0, [x0]
+prfw pstl1strm, p0, [x0]
+prfw pstl2keep, p0, [x0]
+prfw pstl2strm, p0, [x0]
+prfw pstl3keep, p0, [x0]
+prfw pstl3strm, p0, [x0]
+ptest p15, p0.b
+ptest p15, p15.b
+ptrue p0.b, pow2
+ptrue p0.d, pow2
+ptrue p0.h, pow2
+ptrue p0.s, pow2
+ptrue p15.b
+ptrue p15.d
+ptrue p15.h
+ptrue p15.s
+ptrue p7.s
+ptrue p7.s, #14
+ptrue p7.s, #15
+ptrue p7.s, #16
+ptrue p7.s, #17
+ptrue p7.s, #18
+ptrue p7.s, #19
+ptrue p7.s, #20
+ptrue p7.s, #21
+ptrue p7.s, #22
+ptrue p7.s, #23
+ptrue p7.s, #24
+ptrue p7.s, #25
+ptrue p7.s, #26
+ptrue p7.s, #27
+ptrue p7.s, #28
+ptrue p7.s, mul3
+ptrue p7.s, mul4
+ptrue p7.s, vl1
+ptrue p7.s, vl128
+ptrue p7.s, vl16
+ptrue p7.s, vl2
+ptrue p7.s, vl256
+ptrue p7.s, vl3
+ptrue p7.s, vl32
+ptrue p7.s, vl4
+ptrue p7.s, vl5
+ptrue p7.s, vl6
+ptrue p7.s, vl64
+ptrue p7.s, vl7
+ptrue p7.s, vl8
+ptrues p0.b, pow2
+ptrues p0.d, pow2
+ptrues p0.h, pow2
+ptrues p0.s, pow2
+ptrues p15.b
+ptrues p15.d
+ptrues p15.h
+ptrues p15.s
+ptrues p7.s
+ptrues p7.s, #14
+ptrues p7.s, #15
+ptrues p7.s, #16
+ptrues p7.s, #17
+ptrues p7.s, #18
+ptrues p7.s, #19
+ptrues p7.s, #20
+ptrues p7.s, #21
+ptrues p7.s, #22
+ptrues p7.s, #23
+ptrues p7.s, #24
+ptrues p7.s, #25
+ptrues p7.s, #26
+ptrues p7.s, #27
+ptrues p7.s, #28
+ptrues p7.s, mul3
+ptrues p7.s, mul4
+ptrues p7.s, vl1
+ptrues p7.s, vl128
+ptrues p7.s, vl16
+ptrues p7.s, vl2
+ptrues p7.s, vl256
+ptrues p7.s, vl3
+ptrues p7.s, vl32
+ptrues p7.s, vl4
+ptrues p7.s, vl5
+ptrues p7.s, vl6
+ptrues p7.s, vl64
+ptrues p7.s, vl7
+ptrues p7.s, vl8
+punpkhi p0.h, p0.b
+punpkhi p15.h, p15.b
+punpklo p0.h, p0.b
+punpklo p15.h, p15.b
+raddhnb z0.b, z1.h, z31.h
+raddhnb z0.h, z1.s, z31.s
+raddhnb z0.s, z1.d, z31.d
+raddhnt z0.b, z1.h, z31.h
+raddhnt z0.h, z1.s, z31.s
+raddhnt z0.s, z1.d, z31.d
+rax1 z0.d, z1.d, z31.d
+rbit z0.b, p7/m, z31.b
+rbit z0.d, p7/m, z31.d
+rbit z0.h, p7/m, z31.h
+rbit z0.s, p7/m, z31.s
+rdffr p0.b
+rdffr p0.b, p0/z
+rdffr p15.b
+rdffr p15.b, p15/z
+rdffrs p0.b, p0/z
+rdffrs p15.b, p15/z
+rdvl x0, #0
+rdvl x21, #-32
+rdvl x23, #31
+rdvl xzr, #-1
+rev p0.b, p1.b
+rev p0.d, p1.d
+rev p0.h, p1.h
+rev p0.s, p1.s
+rev z0.b, z31.b
+rev z0.d, z31.d
+rev z0.h, z31.h
+rev z0.s, z31.s
+revb z0.d, p7/m, z31.d
+revb z0.h, p7/m, z31.h
+revb z0.s, p7/m, z31.s
+revh z0.d, p7/m, z31.d
+revh z0.s, p7/m, z31.s
+revw z0.d, p7/m, z31.d
+rshrnb z0.b, z0.h, #1
+rshrnb z0.h, z0.s, #1
+rshrnb z0.s, z0.d, #1
+rshrnb z31.b, z31.h, #8
+rshrnb z31.h, z31.s, #16
+rshrnb z31.s, z31.d, #32
+rshrnt z0.b, z0.h, #1
+rshrnt z0.h, z0.s, #1
+rshrnt z0.s, z0.d, #1
+rshrnt z31.b, z31.h, #8
+rshrnt z31.h, z31.s, #16
+rshrnt z31.s, z31.d, #32
+rsubhnb z0.b, z1.h, z31.h
+rsubhnb z0.h, z1.s, z31.s
+rsubhnb z0.s, z1.d, z31.d
+rsubhnt z0.b, z1.h, z31.h
+rsubhnt z0.h, z1.s, z31.s
+rsubhnt z0.s, z1.d, z31.d
+saba z0.b, z1.b, z31.b
+saba z0.d, z1.d, z31.d
+saba z0.h, z1.h, z31.h
+saba z0.s, z1.s, z31.s
+sabalb z0.d, z1.s, z31.s
+sabalb z0.h, z1.b, z31.b
+sabalb z0.s, z1.h, z31.h
+sabalt z0.d, z1.s, z31.s
+sabalt z0.h, z1.b, z31.b
+sabalt z0.s, z1.h, z31.h
+sabd z31.b, p7/m, z31.b, z31.b
+sabd z31.d, p7/m, z31.d, z31.d
+sabd z31.h, p7/m, z31.h, z31.h
+sabd z31.s, p7/m, z31.s, z31.s
+sabdlb z0.h, z1.b, z2.b
+sabdlb z29.s, z30.h, z31.h
+sabdlb z31.d, z31.s, z31.s
+sabdlt z0.h, z1.b, z2.b
+sabdlt z29.s, z30.h, z31.h
+sabdlt z31.d, z31.s, z31.s
+sadalp z0.h, p0/m, z1.b
+sadalp z29.s, p0/m, z30.h
+sadalp z30.d, p7/m, z31.s
+saddlb z0.h, z1.b, z2.b
+saddlb z29.s, z30.h, z31.h
+saddlb z31.d, z31.s, z31.s
+saddlbt z0.d, z1.s, z31.s
+saddlbt z0.h, z1.b, z31.b
+saddlbt z0.s, z1.h, z31.h
+saddlt z0.h, z1.b, z2.b
+saddlt z29.s, z30.h, z31.h
+saddlt z31.d, z31.s, z31.s
+saddv d0, p7, z31.b
+saddv d0, p7, z31.h
+saddv d0, p7, z31.s
+saddwb z0.h, z1.h, z2.b
+saddwb z29.s, z30.s, z31.h
+saddwb z31.d, z31.d, z31.s
+saddwt z0.h, z1.h, z2.b
+saddwt z29.s, z30.s, z31.h
+saddwt z31.d, z31.d, z31.s
+sbclb z0.d, z1.d, z31.d
+sbclb z0.s, z1.s, z31.s
+sbclt z0.d, z1.d, z31.d
+sbclt z0.s, z1.s, z31.s
+scvtf z0.d, p0/m, z0.d
+scvtf z0.d, p0/m, z0.s
+scvtf z0.h, p0/m, z0.d
+scvtf z0.h, p0/m, z0.h
+scvtf z0.h, p0/m, z0.s
+scvtf z0.s, p0/m, z0.d
+scvtf z0.s, p0/m, z0.s
+sdiv z0.d, p7/m, z0.d, z31.d
+sdiv z0.s, p7/m, z0.s, z31.s
+sdivr z0.d, p7/m, z0.d, z31.d
+sdivr z0.s, p7/m, z0.s, z31.s
+sdot z0.d, z1.h, z15.h[1]
+sdot z0.d, z1.h, z31.h
+sdot z0.s, z1.b, z31.b
+sdot z0.s, z1.b, z7.b[3]
+sel p0.b, p1, p2.b, p3.b
+sel z23.b, p11, z13.b, z8.b
+sel z23.d, p11, z13.d, z8.d
+sel z23.h, p11, z13.h, z8.h
+sel z23.s, p11, z13.s, z8.s
+setffr
+shadd z0.b, p0/m, z0.b, z1.b
+shadd z0.h, p0/m, z0.h, z1.h
+shadd z29.s, p7/m, z29.s, z30.s
+shadd z31.d, p7/m, z31.d, z30.d
+shrnb z0.b, z0.h, #1
+shrnb z0.h, z0.s, #1
+shrnb z0.s, z0.d, #1
+shrnb z31.b, z31.h, #8
+shrnb z31.h, z31.s, #16
+shrnb z31.s, z31.d, #32
+shrnt z0.b, z0.h, #1
+shrnt z0.h, z0.s, #1
+shrnt z0.s, z0.d, #1
+shrnt z31.b, z31.h, #8
+shrnt z31.h, z31.s, #16
+shrnt z31.s, z31.d, #32
+shsub z0.b, p0/m, z0.b, z1.b
+shsub z0.h, p0/m, z0.h, z1.h
+shsub z29.s, p7/m, z29.s, z30.s
+shsub z31.d, p7/m, z31.d, z30.d
+shsubr z0.b, p0/m, z0.b, z1.b
+shsubr z0.h, p0/m, z0.h, z1.h
+shsubr z29.s, p7/m, z29.s, z30.s
+shsubr z31.d, p7/m, z31.d, z30.d
+sli z0.b, z0.b, #0
+sli z0.d, z0.d, #0
+sli z0.h, z0.h, #0
+sli z0.s, z0.s, #0
+sli z31.b, z31.b, #7
+sli z31.d, z31.d, #63
+sli z31.h, z31.h, #15
+sli z31.s, z31.s, #31
+sm4e z0.s, z0.s, z31.s
+sm4ekey z0.s, z1.s, z31.s
+smax z0.b, z0.b, #-128
+smax z0.d, z0.d, #-128
+smax z0.h, z0.h, #-128
+smax z0.s, z0.s, #-128
+smax z31.b, p7/m, z31.b, z31.b
+smax z31.b, z31.b, #127
+smax z31.d, p7/m, z31.d, z31.d
+smax z31.d, z31.d, #127
+smax z31.h, p7/m, z31.h, z31.h
+smax z31.h, z31.h, #127
+smax z31.s, p7/m, z31.s, z31.s
+smax z31.s, z31.s, #127
+smaxp z0.b, p0/m, z0.b, z1.b
+smaxp z0.h, p0/m, z0.h, z1.h
+smaxp z29.s, p7/m, z29.s, z30.s
+smaxp z31.d, p7/m, z31.d, z30.d
+smaxv b0, p7, z31.b
+smaxv d0, p7, z31.d
+smaxv h0, p7, z31.h
+smaxv s0, p7, z31.s
+smin z0.b, z0.b, #-128
+smin z0.d, z0.d, #-128
+smin z0.h, z0.h, #-128
+smin z0.s, z0.s, #-128
+smin z31.b, p7/m, z31.b, z31.b
+smin z31.b, z31.b, #127
+smin z31.d, p7/m, z31.d, z31.d
+smin z31.d, z31.d, #127
+smin z31.h, p7/m, z31.h, z31.h
+smin z31.h, z31.h, #127
+smin z31.s, p7/m, z31.s, z31.s
+smin z31.s, z31.s, #127
+sminp z0.b, p0/m, z0.b, z1.b
+sminp z0.h, p0/m, z0.h, z1.h
+sminp z29.s, p7/m, z29.s, z30.s
+sminp z31.d, p7/m, z31.d, z30.d
+sminv b0, p7, z31.b
+sminv d0, p7, z31.d
+sminv h0, p7, z31.h
+sminv s0, p7, z31.s
+smlalb z0.d, z1.s, z15.s[1]
+smlalb z0.d, z1.s, z31.s
+smlalb z0.h, z1.b, z31.b
+smlalb z0.s, z1.h, z31.h
+smlalb z0.s, z1.h, z7.h[7]
+smlalt z0.d, z1.s, z15.s[1]
+smlalt z0.d, z1.s, z31.s
+smlalt z0.h, z1.b, z31.b
+smlalt z0.s, z1.h, z31.h
+smlalt z0.s, z1.h, z7.h[7]
+smlslb z0.d, z1.s, z15.s[1]
+smlslb z0.d, z1.s, z31.s
+smlslb z0.h, z1.b, z31.b
+smlslb z0.s, z1.h, z31.h
+smlslb z0.s, z1.h, z7.h[7]
+smlslt z0.d, z1.s, z15.s[1]
+smlslt z0.d, z1.s, z31.s
+smlslt z0.h, z1.b, z31.b
+smlslt z0.s, z1.h, z31.h
+smlslt z0.s, z1.h, z7.h[7]
+smmla z0.s, z1.b, z2.b
+smulh z0.b, p7/m, z0.b, z31.b
+smulh z0.b, z1.b, z2.b
+smulh z0.d, p7/m, z0.d, z31.d
+smulh z0.h, p7/m, z0.h, z31.h
+smulh z0.h, z1.h, z2.h
+smulh z0.s, p7/m, z0.s, z31.s
+smulh z29.s, z30.s, z31.s
+smulh z31.d, z31.d, z31.d
+smullb z0.d, z1.s, z15.s[1]
+smullb z0.h, z1.b, z2.b
+smullb z0.s, z1.h, z7.h[7]
+smullb z29.s, z30.h, z31.h
+smullb z31.d, z31.s, z31.s
+smullt z0.d, z1.s, z15.s[1]
+smullt z0.h, z1.b, z2.b
+smullt z0.s, z1.h, z7.h[7]
+smullt z29.s, z30.h, z31.h
+smullt z31.d, z31.s, z31.s
+splice z29.b, p7, { z30.b, z31.b }
+splice z29.d, p7, { z30.d, z31.d }
+splice z29.h, p7, { z30.h, z31.h }
+splice z29.s, p7, { z30.s, z31.s }
+splice z31.b, p7, z31.b, z31.b
+splice z31.d, p7, z31.d, z31.d
+splice z31.h, p7, z31.h, z31.h
+splice z31.s, p7, z31.s, z31.s
+sqabs z31.b, p7/m, z31.b
+sqabs z31.d, p7/m, z31.d
+sqabs z31.h, p7/m, z31.h
+sqabs z31.s, p7/m, z31.s
+sqadd z0.b, p0/m, z0.b, z1.b
+sqadd z0.b, z0.b, #0
+sqadd z0.b, z0.b, z0.b
+sqadd z0.d, z0.d, #0
+sqadd z0.d, z0.d, #0, lsl #8
+sqadd z0.d, z0.d, z0.d
+sqadd z0.h, p0/m, z0.h, z1.h
+sqadd z0.h, z0.h, #0
+sqadd z0.h, z0.h, #0, lsl #8
+sqadd z0.h, z0.h, z0.h
+sqadd z0.s, z0.s, #0
+sqadd z0.s, z0.s, #0, lsl #8
+sqadd z0.s, z0.s, z0.s
+sqadd z29.s, p7/m, z29.s, z30.s
+sqadd z31.b, z31.b, #255
+sqadd z31.d, p7/m, z31.d, z30.d
+sqadd z31.d, z31.d, #65280
+sqadd z31.h, z31.h, #65280
+sqadd z31.s, z31.s, #65280
+sqcadd z0.b, z0.b, z0.b, #90
+sqcadd z0.d, z0.d, z0.d, #90
+sqcadd z0.h, z0.h, z0.h, #90
+sqcadd z0.s, z0.s, z0.s, #90
+sqcadd z31.b, z31.b, z31.b, #270
+sqcadd z31.d, z31.d, z31.d, #270
+sqcadd z31.h, z31.h, z31.h, #270
+sqcadd z31.s, z31.s, z31.s, #270
+sqdecb x0
+sqdecb x0, #14
+sqdecb x0, all, mul #16
+sqdecb x0, pow2
+sqdecb x0, vl1
+sqdecb x0, w0
+sqdecb x0, w0, all, mul #16
+sqdecb x0, w0, pow2
+sqdecb x0, w0, pow2, mul #16
+sqdecd x0
+sqdecd x0, #14
+sqdecd x0, all, mul #16
+sqdecd x0, pow2
+sqdecd x0, vl1
+sqdecd x0, w0
+sqdecd x0, w0, all, mul #16
+sqdecd x0, w0, pow2
+sqdecd x0, w0, pow2, mul #16
+sqdecd z0.d
+sqdecd z0.d, all, mul #16
+sqdecd z0.d, pow2
+sqdecd z0.d, pow2, mul #16
+sqdech x0
+sqdech x0, #14
+sqdech x0, all, mul #16
+sqdech x0, pow2
+sqdech x0, vl1
+sqdech x0, w0
+sqdech x0, w0, all, mul #16
+sqdech x0, w0, pow2
+sqdech x0, w0, pow2, mul #16
+sqdech z0.h
+sqdech z0.h, all, mul #16
+sqdech z0.h, pow2
+sqdech z0.h, pow2, mul #16
+sqdecp x0, p0.b
+sqdecp x0, p0.d
+sqdecp x0, p0.h
+sqdecp x0, p0.s
+sqdecp xzr, p15.b, wzr
+sqdecp xzr, p15.d, wzr
+sqdecp xzr, p15.h, wzr
+sqdecp xzr, p15.s, wzr
+sqdecp z0.d, p0.d
+sqdecp z0.h, p0.h
+sqdecp z0.s, p0.s
+sqdecw x0
+sqdecw x0, #14
+sqdecw x0, all, mul #16
+sqdecw x0, pow2
+sqdecw x0, vl1
+sqdecw x0, w0
+sqdecw x0, w0, all, mul #16
+sqdecw x0, w0, pow2
+sqdecw x0, w0, pow2, mul #16
+sqdecw z0.s
+sqdecw z0.s, all, mul #16
+sqdecw z0.s, pow2
+sqdecw z0.s, pow2, mul #16
+sqdmlalb z0.d, z1.s, z15.s[3]
+sqdmlalb z0.d, z1.s, z31.s
+sqdmlalb z0.h, z1.b, z31.b
+sqdmlalb z0.s, z1.h, z31.h
+sqdmlalb z0.s, z1.h, z7.h[7]
+sqdmlalbt z0.d, z1.s, z31.s
+sqdmlalbt z0.h, z1.b, z31.b
+sqdmlalbt z0.s, z1.h, z31.h
+sqdmlalt z0.d, z1.s, z15.s[3]
+sqdmlalt z0.d, z1.s, z31.s
+sqdmlalt z0.h, z1.b, z31.b
+sqdmlalt z0.s, z1.h, z31.h
+sqdmlalt z0.s, z1.h, z7.h[7]
+sqdmlslb z0.d, z1.s, z15.s[3]
+sqdmlslb z0.d, z1.s, z31.s
+sqdmlslb z0.h, z1.b, z31.b
+sqdmlslb z0.s, z1.h, z31.h
+sqdmlslb z0.s, z1.h, z7.h[7]
+sqdmlslbt z0.d, z1.s, z31.s
+sqdmlslbt z0.h, z1.b, z31.b
+sqdmlslbt z0.s, z1.h, z31.h
+sqdmlslt z0.d, z1.s, z15.s[3]
+sqdmlslt z0.d, z1.s, z31.s
+sqdmlslt z0.h, z1.b, z31.b
+sqdmlslt z0.s, z1.h, z31.h
+sqdmlslt z0.s, z1.h, z7.h[7]
+sqdmulh z0.b, z1.b, z2.b
+sqdmulh z0.d, z1.d, z15.d[1]
+sqdmulh z0.h, z1.h, z2.h
+sqdmulh z0.h, z1.h, z7.h[7]
+sqdmulh z0.s, z1.s, z7.s[3]
+sqdmulh z29.s, z30.s, z31.s
+sqdmulh z31.d, z31.d, z31.d
+sqdmullb z0.d, z1.s, z15.s[1]
+sqdmullb z0.h, z1.b, z2.b
+sqdmullb z0.s, z1.h, z7.h[7]
+sqdmullb z29.s, z30.h, z31.h
+sqdmullb z31.d, z31.s, z31.s
+sqdmullt z0.d, z1.s, z15.s[1]
+sqdmullt z0.h, z1.b, z2.b
+sqdmullt z0.s, z1.h, z7.h[7]
+sqdmullt z29.s, z30.h, z31.h
+sqdmullt z31.d, z31.s, z31.s
+sqincb x0
+sqincb x0, #14
+sqincb x0, all, mul #16
+sqincb x0, pow2
+sqincb x0, vl1
+sqincb x0, w0
+sqincb x0, w0, all, mul #16
+sqincb x0, w0, pow2
+sqincb x0, w0, pow2, mul #16
+sqincd x0
+sqincd x0, #14
+sqincd x0, all, mul #16
+sqincd x0, pow2
+sqincd x0, vl1
+sqincd x0, w0
+sqincd x0, w0, all, mul #16
+sqincd x0, w0, pow2
+sqincd x0, w0, pow2, mul #16
+sqincd z0.d
+sqincd z0.d, all, mul #16
+sqincd z0.d, pow2
+sqincd z0.d, pow2, mul #16
+sqinch x0
+sqinch x0, #14
+sqinch x0, all, mul #16
+sqinch x0, pow2
+sqinch x0, vl1
+sqinch x0, w0
+sqinch x0, w0, all, mul #16
+sqinch x0, w0, pow2
+sqinch x0, w0, pow2, mul #16
+sqinch z0.h
+sqinch z0.h, all, mul #16
+sqinch z0.h, pow2
+sqinch z0.h, pow2, mul #16
+sqincp x0, p0.b
+sqincp x0, p0.d
+sqincp x0, p0.h
+sqincp x0, p0.s
+sqincp xzr, p15.b, wzr
+sqincp xzr, p15.d, wzr
+sqincp xzr, p15.h, wzr
+sqincp xzr, p15.s, wzr
+sqincp z0.d, p0.d
+sqincp z0.h, p0.h
+sqincp z0.s, p0.s
+sqincw x0
+sqincw x0, #14
+sqincw x0, all, mul #16
+sqincw x0, pow2
+sqincw x0, vl1
+sqincw x0, w0
+sqincw x0, w0, all, mul #16
+sqincw x0, w0, pow2
+sqincw x0, w0, pow2, mul #16
+sqincw z0.s
+sqincw z0.s, all, mul #16
+sqincw z0.s, pow2
+sqincw z0.s, pow2, mul #16
+sqneg z31.b, p7/m, z31.b
+sqneg z31.d, p7/m, z31.d
+sqneg z31.h, p7/m, z31.h
+sqneg z31.s, p7/m, z31.s
+sqrdcmlah z0.b, z1.b, z2.b, #0
+sqrdcmlah z0.d, z1.d, z2.d, #0
+sqrdcmlah z0.h, z1.h, z2.h, #0
+sqrdcmlah z0.h, z1.h, z2.h[0], #0
+sqrdcmlah z0.s, z1.s, z2.s, #0
+sqrdcmlah z0.s, z1.s, z2.s[0], #0
+sqrdcmlah z15.b, z16.b, z17.b, #270
+sqrdcmlah z15.d, z16.d, z17.d, #270
+sqrdcmlah z15.h, z16.h, z17.h, #270
+sqrdcmlah z15.s, z16.s, z17.s, #270
+sqrdcmlah z29.b, z30.b, z31.b, #90
+sqrdcmlah z29.d, z30.d, z31.d, #90
+sqrdcmlah z29.h, z30.h, z31.h, #90
+sqrdcmlah z29.s, z30.s, z31.s, #90
+sqrdcmlah z31.b, z31.b, z31.b, #180
+sqrdcmlah z31.d, z31.d, z31.d, #180
+sqrdcmlah z31.h, z30.h, z7.h[0], #180
+sqrdcmlah z31.h, z31.h, z31.h, #180
+sqrdcmlah z31.s, z30.s, z7.s[0], #180
+sqrdcmlah z31.s, z31.s, z31.s, #180
+sqrdmlah z0.b, z1.b, z31.b
+sqrdmlah z0.d, z1.d, z15.d[1]
+sqrdmlah z0.d, z1.d, z31.d
+sqrdmlah z0.h, z1.h, z31.h
+sqrdmlah z0.h, z1.h, z7.h[7]
+sqrdmlah z0.s, z1.s, z31.s
+sqrdmlah z0.s, z1.s, z7.s[3]
+sqrdmlsh z0.b, z1.b, z31.b
+sqrdmlsh z0.d, z1.d, z15.d[1]
+sqrdmlsh z0.d, z1.d, z31.d
+sqrdmlsh z0.h, z1.h, z31.h
+sqrdmlsh z0.h, z1.h, z7.h[7]
+sqrdmlsh z0.s, z1.s, z31.s
+sqrdmlsh z0.s, z1.s, z7.s[3]
+sqrdmulh z0.b, z1.b, z2.b
+sqrdmulh z0.d, z1.d, z15.d[1]
+sqrdmulh z0.h, z1.h, z2.h
+sqrdmulh z0.h, z1.h, z7.h[7]
+sqrdmulh z0.s, z1.s, z7.s[3]
+sqrdmulh z29.s, z30.s, z31.s
+sqrdmulh z31.d, z31.d, z31.d
+sqrshl z0.b, p0/m, z0.b, z1.b
+sqrshl z0.h, p0/m, z0.h, z1.h
+sqrshl z29.s, p7/m, z29.s, z30.s
+sqrshl z31.d, p7/m, z31.d, z30.d
+sqrshlr z0.b, p0/m, z0.b, z1.b
+sqrshlr z0.h, p0/m, z0.h, z1.h
+sqrshlr z29.s, p7/m, z29.s, z30.s
+sqrshlr z31.d, p7/m, z31.d, z30.d
+sqrshrnb z0.b, z0.h, #1
+sqrshrnb z0.h, z0.s, #1
+sqrshrnb z0.s, z0.d, #1
+sqrshrnb z31.b, z31.h, #8
+sqrshrnb z31.h, z31.s, #16
+sqrshrnb z31.s, z31.d, #32
+sqrshrnt z0.b, z0.h, #1
+sqrshrnt z0.h, z0.s, #1
+sqrshrnt z0.s, z0.d, #1
+sqrshrnt z31.b, z31.h, #8
+sqrshrnt z31.h, z31.s, #16
+sqrshrnt z31.s, z31.d, #32
+sqrshrunb z0.b, z0.h, #1
+sqrshrunb z0.h, z0.s, #1
+sqrshrunb z0.s, z0.d, #1
+sqrshrunb z31.b, z31.h, #8
+sqrshrunb z31.h, z31.s, #16
+sqrshrunb z31.s, z31.d, #32
+sqrshrunt z0.b, z0.h, #1
+sqrshrunt z0.h, z0.s, #1
+sqrshrunt z0.s, z0.d, #1
+sqrshrunt z31.b, z31.h, #8
+sqrshrunt z31.h, z31.s, #16
+sqrshrunt z31.s, z31.d, #32
+sqshl z0.b, p0/m, z0.b, #0
+sqshl z0.b, p0/m, z0.b, z1.b
+sqshl z0.d, p0/m, z0.d, #0
+sqshl z0.h, p0/m, z0.h, #0
+sqshl z0.h, p0/m, z0.h, z1.h
+sqshl z0.s, p0/m, z0.s, #0
+sqshl z29.s, p7/m, z29.s, z30.s
+sqshl z31.b, p0/m, z31.b, #7
+sqshl z31.d, p0/m, z31.d, #63
+sqshl z31.d, p7/m, z31.d, z30.d
+sqshl z31.h, p0/m, z31.h, #15
+sqshl z31.s, p0/m, z31.s, #31
+sqshlr z0.b, p0/m, z0.b, z1.b
+sqshlr z0.h, p0/m, z0.h, z1.h
+sqshlr z29.s, p7/m, z29.s, z30.s
+sqshlr z31.d, p7/m, z31.d, z30.d
+sqshlu z0.b, p0/m, z0.b, #0
+sqshlu z0.d, p0/m, z0.d, #0
+sqshlu z0.h, p0/m, z0.h, #0
+sqshlu z0.s, p0/m, z0.s, #0
+sqshlu z31.b, p0/m, z31.b, #7
+sqshlu z31.d, p0/m, z31.d, #63
+sqshlu z31.h, p0/m, z31.h, #15
+sqshlu z31.s, p0/m, z31.s, #31
+sqshrnb z0.b, z0.h, #1
+sqshrnb z0.h, z0.s, #1
+sqshrnb z0.s, z0.d, #1
+sqshrnb z31.b, z31.h, #8
+sqshrnb z31.h, z31.s, #16
+sqshrnb z31.s, z31.d, #32
+sqshrnt z0.b, z0.h, #1
+sqshrnt z0.h, z0.s, #1
+sqshrnt z0.s, z0.d, #1
+sqshrnt z31.b, z31.h, #8
+sqshrnt z31.h, z31.s, #16
+sqshrnt z31.s, z31.d, #32
+sqshrunb z0.b, z0.h, #1
+sqshrunb z0.h, z0.s, #1
+sqshrunb z0.s, z0.d, #1
+sqshrunb z31.b, z31.h, #8
+sqshrunb z31.h, z31.s, #16
+sqshrunb z31.s, z31.d, #32
+sqshrunt z0.b, z0.h, #1
+sqshrunt z0.h, z0.s, #1
+sqshrunt z0.s, z0.d, #1
+sqshrunt z31.b, z31.h, #8
+sqshrunt z31.h, z31.s, #16
+sqshrunt z31.s, z31.d, #32
+sqsub z0.b, p0/m, z0.b, z1.b
+sqsub z0.b, z0.b, #0
+sqsub z0.b, z0.b, z0.b
+sqsub z0.d, z0.d, #0
+sqsub z0.d, z0.d, #0, lsl #8
+sqsub z0.d, z0.d, z0.d
+sqsub z0.h, p0/m, z0.h, z1.h
+sqsub z0.h, z0.h, #0
+sqsub z0.h, z0.h, #0, lsl #8
+sqsub z0.h, z0.h, z0.h
+sqsub z0.s, z0.s, #0
+sqsub z0.s, z0.s, #0, lsl #8
+sqsub z0.s, z0.s, z0.s
+sqsub z29.s, p7/m, z29.s, z30.s
+sqsub z31.b, z31.b, #255
+sqsub z31.d, p7/m, z31.d, z30.d
+sqsub z31.d, z31.d, #65280
+sqsub z31.h, z31.h, #65280
+sqsub z31.s, z31.s, #65280
+sqsubr z0.b, p0/m, z0.b, z1.b
+sqsubr z0.h, p0/m, z0.h, z1.h
+sqsubr z29.s, p7/m, z29.s, z30.s
+sqsubr z31.d, p7/m, z31.d, z30.d
+sqxtnb z0.b, z31.h
+sqxtnb z0.h, z31.s
+sqxtnb z0.s, z31.d
+sqxtnt z0.b, z31.h
+sqxtnt z0.h, z31.s
+sqxtnt z0.s, z31.d
+sqxtunb z0.b, z31.h
+sqxtunb z0.h, z31.s
+sqxtunb z0.s, z31.d
+sqxtunt z0.b, z31.h
+sqxtunt z0.h, z31.s
+sqxtunt z0.s, z31.d
+srhadd z0.b, p0/m, z0.b, z1.b
+srhadd z0.h, p0/m, z0.h, z1.h
+srhadd z29.s, p7/m, z29.s, z30.s
+srhadd z31.d, p7/m, z31.d, z30.d
+sri z0.b, z0.b, #1
+sri z0.d, z0.d, #1
+sri z0.h, z0.h, #1
+sri z0.s, z0.s, #1
+sri z31.b, z31.b, #8
+sri z31.d, z31.d, #64
+sri z31.h, z31.h, #16
+sri z31.s, z31.s, #32
+srshl z0.b, p0/m, z0.b, z1.b
+srshl z0.h, p0/m, z0.h, z1.h
+srshl z29.s, p7/m, z29.s, z30.s
+srshl z31.d, p7/m, z31.d, z30.d
+srshlr z0.b, p0/m, z0.b, z1.b
+srshlr z0.h, p0/m, z0.h, z1.h
+srshlr z29.s, p7/m, z29.s, z30.s
+srshlr z31.d, p7/m, z31.d, z30.d
+srshr z0.b, p0/m, z0.b, #1
+srshr z0.d, p0/m, z0.d, #1
+srshr z0.h, p0/m, z0.h, #1
+srshr z0.s, p0/m, z0.s, #1
+srshr z31.b, p0/m, z31.b, #8
+srshr z31.d, p0/m, z31.d, #64
+srshr z31.h, p0/m, z31.h, #16
+srshr z31.s, p0/m, z31.s, #32
+srsra z0.b, z0.b, #1
+srsra z0.d, z0.d, #1
+srsra z0.h, z0.h, #1
+srsra z0.s, z0.s, #1
+srsra z31.b, z31.b, #8
+srsra z31.d, z31.d, #64
+srsra z31.h, z31.h, #16
+srsra z31.s, z31.s, #32
+sshllb z0.d, z0.s, #0
+sshllb z0.h, z0.b, #0
+sshllb z0.s, z0.h, #0
+sshllb z31.d, z31.s, #31
+sshllb z31.h, z31.b, #7
+sshllb z31.s, z31.h, #15
+sshllt z0.d, z0.s, #0
+sshllt z0.h, z0.b, #0
+sshllt z0.s, z0.h, #0
+sshllt z31.d, z31.s, #31
+sshllt z31.h, z31.b, #7
+sshllt z31.s, z31.h, #15
+ssra z0.b, z0.b, #1
+ssra z0.d, z0.d, #1
+ssra z0.h, z0.h, #1
+ssra z0.s, z0.s, #1
+ssra z31.b, z31.b, #8
+ssra z31.d, z31.d, #64
+ssra z31.h, z31.h, #16
+ssra z31.s, z31.s, #32
+ssublb z0.h, z1.b, z2.b
+ssublb z29.s, z30.h, z31.h
+ssublb z31.d, z31.s, z31.s
+ssublbt z0.d, z1.s, z31.s
+ssublbt z0.h, z1.b, z31.b
+ssublbt z0.s, z1.h, z31.h
+ssublt z0.h, z1.b, z2.b
+ssublt z29.s, z30.h, z31.h
+ssublt z31.d, z31.s, z31.s
+ssubltb z0.d, z1.s, z31.s
+ssubltb z0.h, z1.b, z31.b
+ssubltb z0.s, z1.h, z31.h
+ssubwb z0.h, z1.h, z2.b
+ssubwb z29.s, z30.s, z31.h
+ssubwb z31.d, z31.d, z31.s
+ssubwt z0.h, z1.h, z2.b
+ssubwt z29.s, z30.s, z31.h
+ssubwt z31.d, z31.d, z31.s
+st1b { z0.b }, p0, [x0, x0]
+st1b { z0.b }, p0, [x0]
+st1b { z0.d }, p0, [x0, x0]
+st1b { z0.d }, p0, [x0, z0.d, sxtw]
+st1b { z0.d }, p0, [x0, z0.d, uxtw]
+st1b { z0.d }, p0, [x0, z0.d]
+st1b { z0.d }, p0, [x0]
+st1b { z0.d }, p7, [z0.d]
+st1b { z0.h }, p0, [x0, x0]
+st1b { z0.h }, p0, [x0]
+st1b { z0.s }, p0, [x0, x0]
+st1b { z0.s }, p0, [x0, z0.s, sxtw]
+st1b { z0.s }, p0, [x0, z0.s, uxtw]
+st1b { z0.s }, p0, [x0]
+st1b { z0.s }, p7, [z0.s]
+st1b { z21.b }, p5, [x10, #5, mul vl]
+st1b { z21.d }, p5, [x10, #5, mul vl]
+st1b { z21.h }, p5, [x10, #5, mul vl]
+st1b { z21.s }, p5, [x10, #5, mul vl]
+st1b { z31.b }, p7, [sp, #-1, mul vl]
+st1b { z31.d }, p7, [sp, #-1, mul vl]
+st1b { z31.d }, p7, [z31.d, #31]
+st1b { z31.h }, p7, [sp, #-1, mul vl]
+st1b { z31.s }, p7, [sp, #-1, mul vl]
+st1b { z31.s }, p7, [z31.s, #31]
+st1d { z0.d }, p0, [x0, x0, lsl #3]
+st1d { z0.d }, p0, [x0, z0.d, lsl #3]
+st1d { z0.d }, p0, [x0, z0.d, sxtw #3]
+st1d { z0.d }, p0, [x0, z0.d, sxtw]
+st1d { z0.d }, p0, [x0, z0.d, uxtw #3]
+st1d { z0.d }, p0, [x0, z0.d, uxtw]
+st1d { z0.d }, p0, [x0, z0.d]
+st1d { z0.d }, p0, [x0]
+st1d { z0.d }, p7, [z0.d]
+st1d { z21.d }, p5, [x10, #5, mul vl]
+st1d { z31.d }, p7, [sp, #-1, mul vl]
+st1d { z31.d }, p7, [z31.d, #248]
+st1h { z0.d }, p0, [x0, x0, lsl #1]
+st1h { z0.d }, p0, [x0, z0.d, lsl #1]
+st1h { z0.d }, p0, [x0, z0.d, sxtw #1]
+st1h { z0.d }, p0, [x0, z0.d, sxtw]
+st1h { z0.d }, p0, [x0, z0.d, uxtw #1]
+st1h { z0.d }, p0, [x0, z0.d, uxtw]
+st1h { z0.d }, p0, [x0, z0.d]
+st1h { z0.d }, p0, [x0]
+st1h { z0.d }, p7, [z0.d]
+st1h { z0.h }, p0, [x0, x0, lsl #1]
+st1h { z0.h }, p0, [x0]
+st1h { z0.s }, p0, [x0, x0, lsl #1]
+st1h { z0.s }, p0, [x0, z0.s, sxtw #1]
+st1h { z0.s }, p0, [x0, z0.s, sxtw]
+st1h { z0.s }, p0, [x0, z0.s, uxtw #1]
+st1h { z0.s }, p0, [x0, z0.s, uxtw]
+st1h { z0.s }, p0, [x0]
+st1h { z0.s }, p7, [z0.s]
+st1h { z21.d }, p5, [x10, #5, mul vl]
+st1h { z21.h }, p5, [x10, #5, mul vl]
+st1h { z21.s }, p5, [x10, #5, mul vl]
+st1h { z31.d }, p7, [sp, #-1, mul vl]
+st1h { z31.d }, p7, [z31.d, #62]
+st1h { z31.h }, p7, [sp, #-1, mul vl]
+st1h { z31.s }, p7, [sp, #-1, mul vl]
+st1h { z31.s }, p7, [z31.s, #62]
+st1w { z0.d }, p0, [x0, x0, lsl #2]
+st1w { z0.d }, p0, [x0, z0.d, lsl #2]
+st1w { z0.d }, p0, [x0, z0.d, sxtw #2]
+st1w { z0.d }, p0, [x0, z0.d, sxtw]
+st1w { z0.d }, p0, [x0, z0.d, uxtw #2]
+st1w { z0.d }, p0, [x0, z0.d, uxtw]
+st1w { z0.d }, p0, [x0, z0.d]
+st1w { z0.d }, p0, [x0]
+st1w { z0.d }, p7, [z0.d]
+st1w { z0.s }, p0, [x0, x0, lsl #2]
+st1w { z0.s }, p0, [x0, z0.s, sxtw #2]
+st1w { z0.s }, p0, [x0, z0.s, sxtw]
+st1w { z0.s }, p0, [x0, z0.s, uxtw #2]
+st1w { z0.s }, p0, [x0, z0.s, uxtw]
+st1w { z0.s }, p0, [x0]
+st1w { z0.s }, p7, [z0.s]
+st1w { z21.d }, p5, [x10, #5, mul vl]
+st1w { z21.s }, p5, [x10, #5, mul vl]
+st1w { z31.d }, p7, [sp, #-1, mul vl]
+st1w { z31.d }, p7, [z31.d, #124]
+st1w { z31.s }, p7, [sp, #-1, mul vl]
+st1w { z31.s }, p7, [z31.s, #124]
+st2b { z0.b, z1.b }, p0, [x0, x0]
+st2b { z0.b, z1.b }, p0, [x0]
+st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
+st2b { z5.b, z6.b }, p3, [x17, x16]
+st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
+st2d { z0.d, z1.d }, p0, [x0]
+st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
+st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
+st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
+st2h { z0.h, z1.h }, p0, [x0]
+st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
+st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
+st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
+st2w { z0.s, z1.s }, p0, [x0]
+st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
+st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
+st3b { z0.b, z1.b, z2.b }, p0, [x0, x0]
+st3b { z0.b, z1.b, z2.b }, p0, [x0]
+st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl]
+st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl]
+st3b { z5.b, z6.b, z7.b }, p3, [x17, x16]
+st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3]
+st3d { z0.d, z1.d, z2.d }, p0, [x0]
+st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl]
+st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl]
+st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3]
+st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1]
+st3h { z0.h, z1.h, z2.h }, p0, [x0]
+st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl]
+st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl]
+st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1]
+st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2]
+st3w { z0.s, z1.s, z2.s }, p0, [x0]
+st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl]
+st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl]
+st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2]
+st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0]
+st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
+st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl]
+st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl]
+st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16]
+st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3]
+st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
+st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl]
+st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl]
+st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3]
+st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1]
+st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
+st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl]
+st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl]
+st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1]
+st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2]
+st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
+st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl]
+st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl]
+st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2]
+stnt1b { z0.b }, p0, [x0, x0]
+stnt1b { z0.b }, p0, [x0]
+stnt1b { z0.d }, p0, [z1.d]
+stnt1b { z0.s }, p0, [z1.s]
+stnt1b { z21.b }, p5, [x10, #7, mul vl]
+stnt1b { z23.b }, p3, [x13, #-8, mul vl]
+stnt1b { z31.d }, p7, [z31.d, x0]
+stnt1b { z31.d }, p7, [z31.d]
+stnt1b { z31.s }, p7, [z31.s, x0]
+stnt1b { z31.s }, p7, [z31.s]
+stnt1d { z0.d }, p0, [x0, x0, lsl #3]
+stnt1d { z0.d }, p0, [x0]
+stnt1d { z0.d }, p0, [z1.d]
+stnt1d { z21.d }, p5, [x10, #7, mul vl]
+stnt1d { z23.d }, p3, [x13, #-8, mul vl]
+stnt1d { z31.d }, p7, [z31.d, x0]
+stnt1d { z31.d }, p7, [z31.d]
+stnt1h { z0.d }, p0, [z1.d]
+stnt1h { z0.h }, p0, [x0, x0, lsl #1]
+stnt1h { z0.h }, p0, [x0]
+stnt1h { z0.s }, p0, [z1.s]
+stnt1h { z21.h }, p5, [x10, #7, mul vl]
+stnt1h { z23.h }, p3, [x13, #-8, mul vl]
+stnt1h { z31.d }, p7, [z31.d, x0]
+stnt1h { z31.d }, p7, [z31.d]
+stnt1h { z31.s }, p7, [z31.s, x0]
+stnt1h { z31.s }, p7, [z31.s]
+stnt1w { z0.d }, p0, [z1.d]
+stnt1w { z0.s }, p0, [x0, x0, lsl #2]
+stnt1w { z0.s }, p0, [x0]
+stnt1w { z0.s }, p0, [z1.s]
+stnt1w { z21.s }, p5, [x10, #7, mul vl]
+stnt1w { z23.s }, p3, [x13, #-8, mul vl]
+stnt1w { z31.d }, p7, [z31.d, x0]
+stnt1w { z31.d }, p7, [z31.d]
+stnt1w { z31.s }, p7, [z31.s, x0]
+stnt1w { z31.s }, p7, [z31.s]
+str p0, [x0]
+str p15, [sp, #-256, mul vl]
+str p5, [x10, #255, mul vl]
+str z0, [x0]
+str z21, [x10, #-256, mul vl]
+str z31, [sp, #255, mul vl]
+sub z0.b, p0/m, z0.b, z0.b
+sub z0.b, z0.b, #0
+sub z0.b, z0.b, z0.b
+sub z0.d, p0/m, z0.d, z0.d
+sub z0.d, z0.d, #0
+sub z0.d, z0.d, #0, lsl #8
+sub z0.d, z0.d, z0.d
+sub z0.h, p0/m, z0.h, z0.h
+sub z0.h, z0.h, #0
+sub z0.h, z0.h, #0, lsl #8
+sub z0.h, z0.h, z0.h
+sub z0.s, p0/m, z0.s, z0.s
+sub z0.s, z0.s, #0
+sub z0.s, z0.s, #0, lsl #8
+sub z0.s, z0.s, z0.s
+sub z21.b, p5/m, z21.b, z10.b
+sub z21.b, z10.b, z21.b
+sub z21.d, p5/m, z21.d, z10.d
+sub z21.d, z10.d, z21.d
+sub z21.h, p5/m, z21.h, z10.h
+sub z21.h, z10.h, z21.h
+sub z21.s, p5/m, z21.s, z10.s
+sub z21.s, z10.s, z21.s
+sub z23.b, p3/m, z23.b, z13.b
+sub z23.b, z13.b, z8.b
+sub z23.d, p3/m, z23.d, z13.d
+sub z23.d, z13.d, z8.d
+sub z23.h, p3/m, z23.h, z13.h
+sub z23.h, z13.h, z8.h
+sub z23.s, p3/m, z23.s, z13.s
+sub z23.s, z13.s, z8.s
+sub z31.b, p7/m, z31.b, z31.b
+sub z31.b, z31.b, #255
+sub z31.b, z31.b, z31.b
+sub z31.d, p7/m, z31.d, z31.d
+sub z31.d, z31.d, #65280
+sub z31.d, z31.d, z31.d
+sub z31.h, p7/m, z31.h, z31.h
+sub z31.h, z31.h, #65280
+sub z31.h, z31.h, z31.h
+sub z31.s, p7/m, z31.s, z31.s
+sub z31.s, z31.s, #65280
+sub z31.s, z31.s, z31.s
+subhnb z0.b, z1.h, z31.h
+subhnb z0.h, z1.s, z31.s
+subhnb z0.s, z1.d, z31.d
+subhnt z0.b, z1.h, z31.h
+subhnt z0.h, z1.s, z31.s
+subhnt z0.s, z1.d, z31.d
+subr z0.b, p0/m, z0.b, z0.b
+subr z0.b, z0.b, #0
+subr z0.d, p0/m, z0.d, z0.d
+subr z0.d, z0.d, #0
+subr z0.d, z0.d, #0, lsl #8
+subr z0.h, p0/m, z0.h, z0.h
+subr z0.h, z0.h, #0
+subr z0.h, z0.h, #0, lsl #8
+subr z0.s, p0/m, z0.s, z0.s
+subr z0.s, z0.s, #0
+subr z0.s, z0.s, #0, lsl #8
+subr z31.b, z31.b, #255
+subr z31.d, z31.d, #65280
+subr z31.h, z31.h, #65280
+subr z31.s, z31.s, #65280
+sunpkhi z31.d, z31.s
+sunpkhi z31.h, z31.b
+sunpkhi z31.s, z31.h
+sunpklo z31.d, z31.s
+sunpklo z31.h, z31.b
+sunpklo z31.s, z31.h
+suqadd z0.b, p0/m, z0.b, z1.b
+suqadd z0.h, p0/m, z0.h, z1.h
+suqadd z29.s, p7/m, z29.s, z30.s
+suqadd z31.d, p7/m, z31.d, z30.d
+sxtb z0.d, p0/m, z0.d
+sxtb z0.h, p0/m, z0.h
+sxtb z0.s, p0/m, z0.s
+sxtb z31.d, p7/m, z31.d
+sxtb z31.h, p7/m, z31.h
+sxtb z31.s, p7/m, z31.s
+sxth z0.d, p0/m, z0.d
+sxth z0.s, p0/m, z0.s
+sxth z31.d, p7/m, z31.d
+sxth z31.s, p7/m, z31.s
+sxtw z0.d, p0/m, z0.d
+sxtw z31.d, p7/m, z31.d
+tbl z28.b, { z29.b, z30.b }, z31.b
+tbl z28.d, { z29.d, z30.d }, z31.d
+tbl z28.h, { z29.h, z30.h }, z31.h
+tbl z28.s, { z29.s, z30.s }, z31.s
+tbl z31.b, { z31.b }, z31.b
+tbl z31.d, { z31.d }, z31.d
+tbl z31.h, { z31.h }, z31.h
+tbl z31.s, { z31.s }, z31.s
+tbx z31.b, z31.b, z31.b
+tbx z31.d, z31.d, z31.d
+tbx z31.h, z31.h, z31.h
+tbx z31.s, z31.s, z31.s
+trn1 p15.b, p15.b, p15.b
+trn1 p15.d, p15.d, p15.d
+trn1 p15.h, p15.h, p15.h
+trn1 p15.s, p15.s, p15.s
+trn1 z31.b, z31.b, z31.b
+trn1 z31.d, z31.d, z31.d
+trn1 z31.h, z31.h, z31.h
+trn1 z31.s, z31.s, z31.s
+trn2 p15.b, p15.b, p15.b
+trn2 p15.d, p15.d, p15.d
+trn2 p15.h, p15.h, p15.h
+trn2 p15.s, p15.s, p15.s
+trn2 z31.b, z31.b, z31.b
+trn2 z31.d, z31.d, z31.d
+trn2 z31.h, z31.h, z31.h
+trn2 z31.s, z31.s, z31.s
+uaba z0.b, z1.b, z31.b
+uaba z0.d, z1.d, z31.d
+uaba z0.h, z1.h, z31.h
+uaba z0.s, z1.s, z31.s
+uabalb z0.d, z1.s, z31.s
+uabalb z0.h, z1.b, z31.b
+uabalb z0.s, z1.h, z31.h
+uabalt z0.d, z1.s, z31.s
+uabalt z0.h, z1.b, z31.b
+uabalt z0.s, z1.h, z31.h
+uabd z31.b, p7/m, z31.b, z31.b
+uabd z31.d, p7/m, z31.d, z31.d
+uabd z31.h, p7/m, z31.h, z31.h
+uabd z31.s, p7/m, z31.s, z31.s
+uabdlb z0.h, z1.b, z2.b
+uabdlb z29.s, z30.h, z31.h
+uabdlb z31.d, z31.s, z31.s
+uabdlt z0.h, z1.b, z2.b
+uabdlt z29.s, z30.h, z31.h
+uabdlt z31.d, z31.s, z31.s
+uadalp z0.h, p0/m, z1.b
+uadalp z29.s, p0/m, z30.h
+uadalp z30.d, p7/m, z31.s
+uaddlb z0.h, z1.b, z2.b
+uaddlb z29.s, z30.h, z31.h
+uaddlb z31.d, z31.s, z31.s
+uaddlt z0.h, z1.b, z2.b
+uaddlt z29.s, z30.h, z31.h
+uaddlt z31.d, z31.s, z31.s
+uaddv d0, p7, z31.b
+uaddv d0, p7, z31.d
+uaddv d0, p7, z31.h
+uaddv d0, p7, z31.s
+uaddwb z0.h, z1.h, z2.b
+uaddwb z29.s, z30.s, z31.h
+uaddwb z31.d, z31.d, z31.s
+uaddwt z0.h, z1.h, z2.b
+uaddwt z29.s, z30.s, z31.h
+uaddwt z31.d, z31.d, z31.s
+ucvtf z0.d, p0/m, z0.d
+ucvtf z0.d, p0/m, z0.s
+ucvtf z0.h, p0/m, z0.d
+ucvtf z0.h, p0/m, z0.h
+ucvtf z0.h, p0/m, z0.s
+ucvtf z0.s, p0/m, z0.d
+ucvtf z0.s, p0/m, z0.s
+udiv z0.d, p7/m, z0.d, z31.d
+udiv z0.s, p7/m, z0.s, z31.s
+udivr z0.d, p7/m, z0.d, z31.d
+udivr z0.s, p7/m, z0.s, z31.s
+udot z0.d, z1.h, z15.h[1]
+udot z0.d, z1.h, z31.h
+udot z0.s, z1.b, z31.b
+udot z0.s, z1.b, z7.b[3]
+uhadd z0.b, p0/m, z0.b, z1.b
+uhadd z0.h, p0/m, z0.h, z1.h
+uhadd z29.s, p7/m, z29.s, z30.s
+uhadd z31.d, p7/m, z31.d, z30.d
+uhsub z0.b, p0/m, z0.b, z1.b
+uhsub z0.h, p0/m, z0.h, z1.h
+uhsub z29.s, p7/m, z29.s, z30.s
+uhsub z31.d, p7/m, z31.d, z30.d
+uhsubr z0.b, p0/m, z0.b, z1.b
+uhsubr z0.h, p0/m, z0.h, z1.h
+uhsubr z29.s, p7/m, z29.s, z30.s
+uhsubr z31.d, p7/m, z31.d, z30.d
+umax z0.b, z0.b, #0
+umax z31.b, p7/m, z31.b, z31.b
+umax z31.b, z31.b, #255
+umax z31.d, p7/m, z31.d, z31.d
+umax z31.h, p7/m, z31.h, z31.h
+umax z31.s, p7/m, z31.s, z31.s
+umaxp z0.b, p0/m, z0.b, z1.b
+umaxp z0.h, p0/m, z0.h, z1.h
+umaxp z29.s, p7/m, z29.s, z30.s
+umaxp z31.d, p7/m, z31.d, z30.d
+umaxv b0, p7, z31.b
+umaxv d0, p7, z31.d
+umaxv h0, p7, z31.h
+umaxv s0, p7, z31.s
+umin z0.b, z0.b, #0
+umin z31.b, p7/m, z31.b, z31.b
+umin z31.b, z31.b, #255
+umin z31.d, p7/m, z31.d, z31.d
+umin z31.h, p7/m, z31.h, z31.h
+umin z31.s, p7/m, z31.s, z31.s
+uminp z0.b, p0/m, z0.b, z1.b
+uminp z0.h, p0/m, z0.h, z1.h
+uminp z29.s, p7/m, z29.s, z30.s
+uminp z31.d, p7/m, z31.d, z30.d
+uminv b0, p7, z31.b
+uminv d0, p7, z31.d
+uminv h0, p7, z31.h
+uminv s0, p7, z31.s
+umlalb z0.d, z1.s, z15.s[1]
+umlalb z0.d, z1.s, z31.s
+umlalb z0.h, z1.b, z31.b
+umlalb z0.s, z1.h, z31.h
+umlalb z0.s, z1.h, z7.h[7]
+umlalt z0.d, z1.s, z15.s[1]
+umlalt z0.d, z1.s, z31.s
+umlalt z0.h, z1.b, z31.b
+umlalt z0.s, z1.h, z31.h
+umlalt z0.s, z1.h, z7.h[7]
+umlslb z0.d, z1.s, z15.s[1]
+umlslb z0.d, z1.s, z31.s
+umlslb z0.h, z1.b, z31.b
+umlslb z0.s, z1.h, z31.h
+umlslb z0.s, z1.h, z7.h[7]
+umlslt z0.d, z1.s, z15.s[1]
+umlslt z0.d, z1.s, z31.s
+umlslt z0.h, z1.b, z31.b
+umlslt z0.s, z1.h, z31.h
+umlslt z0.s, z1.h, z7.h[7]
+ummla z0.s, z1.b, z2.b
+umulh z0.b, p7/m, z0.b, z31.b
+umulh z0.b, z1.b, z2.b
+umulh z0.d, p7/m, z0.d, z31.d
+umulh z0.h, p7/m, z0.h, z31.h
+umulh z0.h, z1.h, z2.h
+umulh z0.s, p7/m, z0.s, z31.s
+umulh z29.s, z30.s, z31.s
+umulh z31.d, z31.d, z31.d
+umullb z0.d, z1.s, z15.s[1]
+umullb z0.h, z1.b, z2.b
+umullb z0.s, z1.h, z7.h[7]
+umullb z29.s, z30.h, z31.h
+umullb z31.d, z31.s, z31.s
+umullt z0.d, z1.s, z15.s[1]
+umullt z0.h, z1.b, z2.b
+umullt z0.s, z1.h, z7.h[7]
+umullt z29.s, z30.h, z31.h
+umullt z31.d, z31.s, z31.s
+uqadd z0.b, p0/m, z0.b, z1.b
+uqadd z0.b, z0.b, #0
+uqadd z0.b, z0.b, z0.b
+uqadd z0.d, z0.d, #0
+uqadd z0.d, z0.d, #0, lsl #8
+uqadd z0.d, z0.d, z0.d
+uqadd z0.h, p0/m, z0.h, z1.h
+uqadd z0.h, z0.h, #0
+uqadd z0.h, z0.h, #0, lsl #8
+uqadd z0.h, z0.h, z0.h
+uqadd z0.s, z0.s, #0
+uqadd z0.s, z0.s, #0, lsl #8
+uqadd z0.s, z0.s, z0.s
+uqadd z29.s, p7/m, z29.s, z30.s
+uqadd z31.b, z31.b, #255
+uqadd z31.d, p7/m, z31.d, z30.d
+uqadd z31.d, z31.d, #65280
+uqadd z31.h, z31.h, #65280
+uqadd z31.s, z31.s, #65280
+uqdecb w0
+uqdecb w0, all, mul #16
+uqdecb w0, pow2
+uqdecb w0, pow2, mul #16
+uqdecb x0
+uqdecb x0, #14
+uqdecb x0, all, mul #16
+uqdecb x0, pow2
+uqdecb x0, vl1
+uqdecd w0
+uqdecd w0, all, mul #16
+uqdecd w0, pow2
+uqdecd w0, pow2, mul #16
+uqdecd x0
+uqdecd x0, #14
+uqdecd x0, all, mul #16
+uqdecd x0, pow2
+uqdecd x0, vl1
+uqdecd z0.d
+uqdecd z0.d, all, mul #16
+uqdecd z0.d, pow2
+uqdecd z0.d, pow2, mul #16
+uqdech w0
+uqdech w0, all, mul #16
+uqdech w0, pow2
+uqdech w0, pow2, mul #16
+uqdech x0
+uqdech x0, #14
+uqdech x0, all, mul #16
+uqdech x0, pow2
+uqdech x0, vl1
+uqdech z0.h
+uqdech z0.h, all, mul #16
+uqdech z0.h, pow2
+uqdech z0.h, pow2, mul #16
+uqdecp wzr, p15.b
+uqdecp wzr, p15.d
+uqdecp wzr, p15.h
+uqdecp wzr, p15.s
+uqdecp x0, p0.b
+uqdecp x0, p0.d
+uqdecp x0, p0.h
+uqdecp x0, p0.s
+uqdecp z0.d, p0.d
+uqdecp z0.h, p0.h
+uqdecp z0.s, p0.s
+uqdecw w0
+uqdecw w0, all, mul #16
+uqdecw w0, pow2
+uqdecw w0, pow2, mul #16
+uqdecw x0
+uqdecw x0, #14
+uqdecw x0, all, mul #16
+uqdecw x0, pow2
+uqdecw x0, vl1
+uqdecw z0.s
+uqdecw z0.s, all, mul #16
+uqdecw z0.s, pow2
+uqdecw z0.s, pow2, mul #16
+uqincb w0
+uqincb w0, all, mul #16
+uqincb w0, pow2
+uqincb w0, pow2, mul #16
+uqincb x0
+uqincb x0, #14
+uqincb x0, all, mul #16
+uqincb x0, pow2
+uqincb x0, vl1
+uqincd w0
+uqincd w0, all, mul #16
+uqincd w0, pow2
+uqincd w0, pow2, mul #16
+uqincd x0
+uqincd x0, #14
+uqincd x0, all, mul #16
+uqincd x0, pow2
+uqincd x0, vl1
+uqincd z0.d
+uqincd z0.d, all, mul #16
+uqincd z0.d, pow2
+uqincd z0.d, pow2, mul #16
+uqinch w0
+uqinch w0, all, mul #16
+uqinch w0, pow2
+uqinch w0, pow2, mul #16
+uqinch x0
+uqinch x0, #14
+uqinch x0, all, mul #16
+uqinch x0, pow2
+uqinch x0, vl1
+uqinch z0.h
+uqinch z0.h, all, mul #16
+uqinch z0.h, pow2
+uqinch z0.h, pow2, mul #16
+uqincp wzr, p15.b
+uqincp wzr, p15.d
+uqincp wzr, p15.h
+uqincp wzr, p15.s
+uqincp x0, p0.b
+uqincp x0, p0.d
+uqincp x0, p0.h
+uqincp x0, p0.s
+uqincp z0.d, p0.d
+uqincp z0.h, p0.h
+uqincp z0.s, p0.s
+uqincw w0
+uqincw w0, all, mul #16
+uqincw w0, pow2
+uqincw w0, pow2, mul #16
+uqincw x0
+uqincw x0, #14
+uqincw x0, all, mul #16
+uqincw x0, pow2
+uqincw x0, vl1
+uqincw z0.s
+uqincw z0.s, all, mul #16
+uqincw z0.s, pow2
+uqincw z0.s, pow2, mul #16
+uqrshl z0.b, p0/m, z0.b, z1.b
+uqrshl z0.h, p0/m, z0.h, z1.h
+uqrshl z29.s, p7/m, z29.s, z30.s
+uqrshl z31.d, p7/m, z31.d, z30.d
+uqrshlr z0.b, p0/m, z0.b, z1.b
+uqrshlr z0.h, p0/m, z0.h, z1.h
+uqrshlr z29.s, p7/m, z29.s, z30.s
+uqrshlr z31.d, p7/m, z31.d, z30.d
+uqrshrnb z0.b, z0.h, #1
+uqrshrnb z0.h, z0.s, #1
+uqrshrnb z0.s, z0.d, #1
+uqrshrnb z31.b, z31.h, #8
+uqrshrnb z31.h, z31.s, #16
+uqrshrnb z31.s, z31.d, #32
+uqrshrnt z0.b, z0.h, #1
+uqrshrnt z0.h, z0.s, #1
+uqrshrnt z0.s, z0.d, #1
+uqrshrnt z31.b, z31.h, #8
+uqrshrnt z31.h, z31.s, #16
+uqrshrnt z31.s, z31.d, #32
+uqshl z0.b, p0/m, z0.b, #0
+uqshl z0.b, p0/m, z0.b, z1.b
+uqshl z0.d, p0/m, z0.d, #0
+uqshl z0.h, p0/m, z0.h, #0
+uqshl z0.h, p0/m, z0.h, z1.h
+uqshl z0.s, p0/m, z0.s, #0
+uqshl z29.s, p7/m, z29.s, z30.s
+uqshl z31.b, p0/m, z31.b, #7
+uqshl z31.d, p0/m, z31.d, #63
+uqshl z31.d, p7/m, z31.d, z30.d
+uqshl z31.h, p0/m, z31.h, #15
+uqshl z31.s, p0/m, z31.s, #31
+uqshlr z0.b, p0/m, z0.b, z1.b
+uqshlr z0.h, p0/m, z0.h, z1.h
+uqshlr z29.s, p7/m, z29.s, z30.s
+uqshlr z31.d, p7/m, z31.d, z30.d
+uqshrnb z0.b, z0.h, #1
+uqshrnb z0.h, z0.s, #1
+uqshrnb z0.s, z0.d, #1
+uqshrnb z31.b, z31.h, #8
+uqshrnb z31.h, z31.s, #16
+uqshrnb z31.s, z31.d, #32
+uqshrnt z0.b, z0.h, #1
+uqshrnt z0.h, z0.s, #1
+uqshrnt z0.s, z0.d, #1
+uqshrnt z31.b, z31.h, #8
+uqshrnt z31.h, z31.s, #16
+uqshrnt z31.s, z31.d, #32
+uqsub z0.b, p0/m, z0.b, z1.b
+uqsub z0.b, z0.b, #0
+uqsub z0.b, z0.b, z0.b
+uqsub z0.d, z0.d, #0
+uqsub z0.d, z0.d, #0, lsl #8
+uqsub z0.d, z0.d, z0.d
+uqsub z0.h, p0/m, z0.h, z1.h
+uqsub z0.h, z0.h, #0
+uqsub z0.h, z0.h, #0, lsl #8
+uqsub z0.h, z0.h, z0.h
+uqsub z0.s, z0.s, #0
+uqsub z0.s, z0.s, #0, lsl #8
+uqsub z0.s, z0.s, z0.s
+uqsub z29.s, p7/m, z29.s, z30.s
+uqsub z31.b, z31.b, #255
+uqsub z31.d, p7/m, z31.d, z30.d
+uqsub z31.d, z31.d, #65280
+uqsub z31.h, z31.h, #65280
+uqsub z31.s, z31.s, #65280
+uqsubr z0.b, p0/m, z0.b, z1.b
+uqsubr z0.h, p0/m, z0.h, z1.h
+uqsubr z29.s, p7/m, z29.s, z30.s
+uqsubr z31.d, p7/m, z31.d, z30.d
+uqxtnb z0.b, z31.h
+uqxtnb z0.h, z31.s
+uqxtnb z0.s, z31.d
+uqxtnt z0.b, z31.h
+uqxtnt z0.h, z31.s
+uqxtnt z0.s, z31.d
+urecpe z31.s, p7/m, z31.s
+urhadd z0.b, p0/m, z0.b, z1.b
+urhadd z0.h, p0/m, z0.h, z1.h
+urhadd z29.s, p7/m, z29.s, z30.s
+urhadd z31.d, p7/m, z31.d, z30.d
+urshl z0.b, p0/m, z0.b, z1.b
+urshl z0.h, p0/m, z0.h, z1.h
+urshl z29.s, p7/m, z29.s, z30.s
+urshl z31.d, p7/m, z31.d, z30.d
+urshlr z0.b, p0/m, z0.b, z1.b
+urshlr z0.h, p0/m, z0.h, z1.h
+urshlr z29.s, p7/m, z29.s, z30.s
+urshlr z31.d, p7/m, z31.d, z30.d
+urshr z0.b, p0/m, z0.b, #1
+urshr z0.d, p0/m, z0.d, #1
+urshr z0.h, p0/m, z0.h, #1
+urshr z0.s, p0/m, z0.s, #1
+urshr z31.b, p0/m, z31.b, #8
+urshr z31.d, p0/m, z31.d, #64
+urshr z31.h, p0/m, z31.h, #16
+urshr z31.s, p0/m, z31.s, #32
+ursqrte z31.s, p7/m, z31.s
+ursra z0.b, z0.b, #1
+ursra z0.d, z0.d, #1
+ursra z0.h, z0.h, #1
+ursra z0.s, z0.s, #1
+ursra z31.b, z31.b, #8
+ursra z31.d, z31.d, #64
+ursra z31.h, z31.h, #16
+ursra z31.s, z31.s, #32
+ushllb z0.d, z0.s, #0
+ushllb z0.h, z0.b, #0
+ushllb z0.s, z0.h, #0
+ushllb z31.d, z31.s, #31
+ushllb z31.h, z31.b, #7
+ushllb z31.s, z31.h, #15
+ushllt z0.d, z0.s, #0
+ushllt z0.h, z0.b, #0
+ushllt z0.s, z0.h, #0
+ushllt z31.d, z31.s, #31
+ushllt z31.h, z31.b, #7
+ushllt z31.s, z31.h, #15
+usmmla z0.s, z1.b, z2.b
+usqadd z0.b, p0/m, z0.b, z1.b
+usqadd z0.h, p0/m, z0.h, z1.h
+usqadd z29.s, p7/m, z29.s, z30.s
+usqadd z31.d, p7/m, z31.d, z30.d
+usra z0.b, z0.b, #1
+usra z0.d, z0.d, #1
+usra z0.h, z0.h, #1
+usra z0.s, z0.s, #1
+usra z31.b, z31.b, #8
+usra z31.d, z31.d, #64
+usra z31.h, z31.h, #16
+usra z31.s, z31.s, #32
+usublb z0.h, z1.b, z2.b
+usublb z29.s, z30.h, z31.h
+usublb z31.d, z31.s, z31.s
+usublt z0.h, z1.b, z2.b
+usublt z29.s, z30.h, z31.h
+usublt z31.d, z31.s, z31.s
+usubwb z0.h, z1.h, z2.b
+usubwb z29.s, z30.s, z31.h
+usubwb z31.d, z31.d, z31.s
+usubwt z0.h, z1.h, z2.b
+usubwt z29.s, z30.s, z31.h
+usubwt z31.d, z31.d, z31.s
+uunpkhi z31.d, z31.s
+uunpkhi z31.h, z31.b
+uunpkhi z31.s, z31.h
+uunpklo z31.d, z31.s
+uunpklo z31.h, z31.b
+uunpklo z31.s, z31.h
+uxtb z0.d, p0/m, z0.d
+uxtb z0.h, p0/m, z0.h
+uxtb z0.s, p0/m, z0.s
+uxtb z31.d, p7/m, z31.d
+uxtb z31.h, p7/m, z31.h
+uxtb z31.s, p7/m, z31.s
+uxth z0.d, p0/m, z0.d
+uxth z0.s, p0/m, z0.s
+uxth z31.d, p7/m, z31.d
+uxth z31.s, p7/m, z31.s
+uxtw z0.d, p0/m, z0.d
+uxtw z31.d, p7/m, z31.d
+uzp1 p15.b, p15.b, p15.b
+uzp1 p15.d, p15.d, p15.d
+uzp1 p15.h, p15.h, p15.h
+uzp1 p15.s, p15.s, p15.s
+uzp1 z31.b, z31.b, z31.b
+uzp1 z31.d, z31.d, z31.d
+uzp1 z31.h, z31.h, z31.h
+uzp1 z31.s, z31.s, z31.s
+uzp2 p15.b, p15.b, p15.b
+uzp2 p15.d, p15.d, p15.d
+uzp2 p15.h, p15.h, p15.h
+uzp2 p15.s, p15.s, p15.s
+uzp2 z31.b, z31.b, z31.b
+uzp2 z31.d, z31.d, z31.d
+uzp2 z31.h, z31.h, z31.h
+uzp2 z31.s, z31.s, z31.s
+whilege p15.b, w0, wzr
+whilege p15.b, wzr, w0
+whilege p15.b, x0, xzr
+whilege p15.b, xzr, x0
+whilege p15.d, w0, wzr
+whilege p15.d, x0, xzr
+whilege p15.h, w0, wzr
+whilege p15.h, x0, xzr
+whilege p15.s, w0, wzr
+whilege p15.s, x0, xzr
+whilerw p15.b, x30, x30
+whilerw p15.d, x30, x30
+whilerw p15.h, x30, x30
+whilerw p15.s, x30, x30
+whilewr p15.b, x30, x30
+whilewr p15.d, x30, x30
+whilewr p15.h, x30, x30
+whilewr p15.s, x30, x30
+wrffr p0.b
+wrffr p15.b
+xar z0.b, z0.b, z1.b, #1
+xar z0.d, z0.d, z1.d, #1
+xar z0.h, z0.h, z1.h, #1
+xar z0.s, z0.s, z1.s, #1
+xar z31.b, z31.b, z30.b, #8
+xar z31.d, z31.d, z30.d, #64
+xar z31.h, z31.h, z30.h, #16
+xar z31.s, z31.s, z30.s, #32
+zip1 p0.b, p0.b, p0.b
+zip1 p0.d, p0.d, p0.d
+zip1 p0.h, p0.h, p0.h
+zip1 p0.s, p0.s, p0.s
+zip1 p15.b, p15.b, p15.b
+zip1 p15.d, p15.d, p15.d
+zip1 p15.h, p15.h, p15.h
+zip1 p15.s, p15.s, p15.s
+zip1 z0.b, z0.b, z0.b
+zip1 z0.d, z0.d, z0.d
+zip1 z0.h, z0.h, z0.h
+zip1 z0.s, z0.s, z0.s
+zip1 z31.b, z31.b, z31.b
+zip1 z31.d, z31.d, z31.d
+zip1 z31.h, z31.h, z31.h
+zip1 z31.s, z31.s, z31.s
+zip2 p0.b, p0.b, p0.b
+zip2 p0.d, p0.d, p0.d
+zip2 p0.h, p0.h, p0.h
+zip2 p0.s, p0.s, p0.s
+zip2 p15.b, p15.b, p15.b
+zip2 p15.d, p15.d, p15.d
+zip2 p15.h, p15.h, p15.h
+zip2 p15.s, p15.s, p15.s
+zip2 z0.b, z0.b, z0.b
+zip2 z0.d, z0.d, z0.d
+zip2 z0.h, z0.h, z0.h
+zip2 z0.s, z0.s, z0.s
+zip2 z31.b, z31.b, z31.b
+zip2 z31.d, z31.d, z31.d
+zip2 z31.h, z31.h, z31.h
+zip2 z31.s, z31.s, z31.s
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 2 0.50 abs z0.b, p0/m, z0.b
+# CHECK-NEXT: 1 2 0.50 abs z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.50 abs z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 2 0.50 abs z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 2 0.50 abs z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.50 abs z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 abs z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 abs z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 adclb z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 adclb z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 adclt z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 adclt z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 add z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 add z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 add z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 add z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 add z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 add z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 add z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 add z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 add z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 add z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 add z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 add z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 add z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 add z0.s, z1.s, z2.s
+# CHECK-NEXT: 1 2 0.50 add z21.b, p5/m, z21.b, z10.b
+# CHECK-NEXT: 1 2 0.50 add z21.b, z10.b, z21.b
+# CHECK-NEXT: 1 2 0.50 add z21.d, p5/m, z21.d, z10.d
+# CHECK-NEXT: 1 2 0.50 add z21.d, z10.d, z21.d
+# CHECK-NEXT: 1 2 0.50 add z21.h, p5/m, z21.h, z10.h
+# CHECK-NEXT: 1 2 0.50 add z21.h, z10.h, z21.h
+# CHECK-NEXT: 1 2 0.50 add z21.s, p5/m, z21.s, z10.s
+# CHECK-NEXT: 1 2 0.50 add z21.s, z10.s, z21.s
+# CHECK-NEXT: 1 2 0.50 add z23.b, p3/m, z23.b, z13.b
+# CHECK-NEXT: 1 2 0.50 add z23.b, z13.b, z8.b
+# CHECK-NEXT: 1 2 0.50 add z23.d, p3/m, z23.d, z13.d
+# CHECK-NEXT: 1 2 0.50 add z23.d, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.50 add z23.h, p3/m, z23.h, z13.h
+# CHECK-NEXT: 1 2 0.50 add z23.h, z13.h, z8.h
+# CHECK-NEXT: 1 2 0.50 add z23.s, p3/m, z23.s, z13.s
+# CHECK-NEXT: 1 2 0.50 add z23.s, z13.s, z8.s
+# CHECK-NEXT: 1 2 0.50 add z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 add z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.50 add z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 add z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 add z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.50 add z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 add z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 add z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.50 add z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 add z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 add z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.50 add z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 addhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 addhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 addhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 addhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 addhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 addhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 addp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 addp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 addp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 addp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 addpl sp, sp, #31
+# CHECK-NEXT: 1 2 0.50 addpl x0, x0, #-32
+# CHECK-NEXT: 1 2 0.50 addpl x21, x21, #0
+# CHECK-NEXT: 1 2 0.50 addpl x23, x8, #-1
+# CHECK-NEXT: 1 2 0.50 addvl sp, sp, #31
+# CHECK-NEXT: 1 2 0.50 addvl x0, x0, #-32
+# CHECK-NEXT: 1 2 0.50 addvl x21, x21, #0
+# CHECK-NEXT: 1 2 0.50 addvl x23, x8, #-1
+# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, lsl #1]
+# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, lsl #2]
+# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, lsl #3]
+# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, sxtw #1]
+# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, sxtw #2]
+# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, sxtw #3]
+# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, sxtw]
+# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, uxtw #1]
+# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, uxtw #2]
+# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, uxtw #3]
+# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d, uxtw]
+# CHECK-NEXT: 1 2 0.50 adr z0.d, [z0.d, z0.d]
+# CHECK-NEXT: 1 2 0.50 adr z0.s, [z0.s, z0.s, lsl #1]
+# CHECK-NEXT: 1 2 0.50 adr z0.s, [z0.s, z0.s, lsl #2]
+# CHECK-NEXT: 1 2 0.50 adr z0.s, [z0.s, z0.s, lsl #3]
+# CHECK-NEXT: 1 2 0.50 adr z0.s, [z0.s, z0.s]
+# CHECK-NEXT: 1 2 0.50 aesd z0.b, z0.b, z31.b
+# CHECK-NEXT: 1 2 0.50 aese z0.b, z0.b, z31.b
+# CHECK-NEXT: 1 2 0.50 aesimc z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 aesimc z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 aesmc z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 aesmc z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 and p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 2 0.50 and z0.d, z0.d, #0x6
+# CHECK-NEXT: 1 2 0.50 and z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: 1 2 0.50 and z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 and z0.s, z0.s, #0x6
+# CHECK-NEXT: 1 2 0.50 and z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: 1 2 0.50 and z23.d, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.50 and z23.h, z23.h, #0x6
+# CHECK-NEXT: 1 2 0.50 and z23.h, z23.h, #0xfff9
+# CHECK-NEXT: 1 2 0.50 and z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 and z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 and z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 and z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 and z5.b, z5.b, #0x6
+# CHECK-NEXT: 1 2 0.50 and z5.b, z5.b, #0xf9
+# CHECK-NEXT: 1 2 0.50 ands p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 2 6 1.00 andv b0, p7, z31.b
+# CHECK-NEXT: 2 6 1.00 andv d0, p7, z31.d
+# CHECK-NEXT: 2 6 1.00 andv h0, p7, z31.h
+# CHECK-NEXT: 2 6 1.00 andv s0, p7, z31.s
+# CHECK-NEXT: 1 2 1.00 asr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 1 2 1.00 asr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 1.00 asr z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: 1 2 1.00 asr z0.b, z0.b, #1
+# CHECK-NEXT: 1 2 1.00 asr z0.b, z1.b, z2.d
+# CHECK-NEXT: 1 2 1.00 asr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 1 2 1.00 asr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 1.00 asr z0.d, z0.d, #1
+# CHECK-NEXT: 1 2 1.00 asr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 1 2 1.00 asr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 1.00 asr z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: 1 2 1.00 asr z0.h, z0.h, #1
+# CHECK-NEXT: 1 2 1.00 asr z0.h, z1.h, z2.d
+# CHECK-NEXT: 1 2 1.00 asr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 1 2 1.00 asr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 1.00 asr z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: 1 2 1.00 asr z0.s, z0.s, #1
+# CHECK-NEXT: 1 2 1.00 asr z0.s, z1.s, z2.d
+# CHECK-NEXT: 1 2 1.00 asr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 1 2 1.00 asr z31.b, z31.b, #8
+# CHECK-NEXT: 1 2 1.00 asr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 1 2 1.00 asr z31.d, z31.d, #64
+# CHECK-NEXT: 1 2 1.00 asr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 1 2 1.00 asr z31.h, z31.h, #16
+# CHECK-NEXT: 1 2 1.00 asr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 1 2 1.00 asr z31.s, z31.s, #32
+# CHECK-NEXT: 1 4 0.50 asrd z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 1 4 0.50 asrd z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 asrd z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 asrd z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 asrd z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 1 4 0.50 asrd z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 1 4 0.50 asrd z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 1 4 0.50 asrd z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 1 2 1.00 asrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 1.00 asrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 1.00 asrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 1.00 asrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 bcax z29.d, z29.d, z30.d, z31.d
+# CHECK-NEXT: 2 6 2.00 bdep z0.b, z1.b, z31.b
+# CHECK-NEXT: 2 6 2.00 bdep z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 6 2.00 bdep z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 6 2.00 bdep z0.s, z1.s, z31.s
+# CHECK-NEXT: 2 6 2.00 bext z0.b, z1.b, z31.b
+# CHECK-NEXT: 2 6 2.00 bext z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 6 2.00 bext z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 6 2.00 bext z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 bfcvt z0.h, p0/m, z1.s
+# CHECK-NEXT: 1 4 0.50 bfcvtnt z0.h, p0/m, z1.s
+# CHECK-NEXT: 1 5 0.50 bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: 1 5 0.50 bfdot z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: 1 5 0.50 bfdot z0.s, z1.h, z2.h[3]
+# CHECK-NEXT: 1 5 0.50 bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: 1 5 0.50 bfmlalb z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: 1 5 0.50 bfmlalb z0.s, z1.h, z2.h[7]
+# CHECK-NEXT: 1 5 0.50 bfmlalb z10.s, z21.h, z14.h
+# CHECK-NEXT: 1 5 0.50 bfmlalb z21.s, z14.h, z3.h[2]
+# CHECK-NEXT: 1 5 0.50 bfmlalt z0.s, z1.h, z2.h
+# CHECK-NEXT: 1 5 0.50 bfmlalt z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: 1 5 0.50 bfmlalt z0.s, z1.h, z2.h[7]
+# CHECK-NEXT: 1 5 0.50 bfmlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 5 0.50 bfmlalt z14.s, z10.h, z21.h
+# CHECK-NEXT: 1 6 0.50 bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: 2 6 2.00 bgrp z0.b, z1.b, z31.b
+# CHECK-NEXT: 2 6 2.00 bgrp z0.d, z1.d, z31.d
+# CHECK-NEXT: 2 6 2.00 bgrp z0.h, z1.h, z31.h
+# CHECK-NEXT: 2 6 2.00 bgrp z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 bic p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 bic p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 bic z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 bic z23.d, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.50 bic z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 bic z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 bic z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 bic z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 bics p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 bics p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 brka p0.b, p15/m, p15.b
+# CHECK-NEXT: 1 2 0.50 brka p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.50 brkas p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.50 brkb p0.b, p15/m, p15.b
+# CHECK-NEXT: 1 2 0.50 brkb p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.50 brkbs p0.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.50 brkn p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT: 1 3 0.50 brkn p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 brkns p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT: 1 3 0.50 brkns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 brkpa p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 1 3 0.50 brkpa p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 brkpas p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 1 3 0.50 brkpas p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 brkpb p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 1 3 0.50 brkpb p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 brkpbs p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: 1 3 0.50 brkpbs p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 bsl z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: 1 2 0.50 bsl1n z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: 1 2 0.50 bsl2n z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: 1 2 0.50 cadd z0.b, z0.b, z0.b, #90
+# CHECK-NEXT: 1 2 0.50 cadd z0.d, z0.d, z0.d, #90
+# CHECK-NEXT: 1 2 0.50 cadd z0.h, z0.h, z0.h, #90
+# CHECK-NEXT: 1 2 0.50 cadd z0.s, z0.s, z0.s, #90
+# CHECK-NEXT: 1 2 0.50 cadd z31.b, z31.b, z31.b, #270
+# CHECK-NEXT: 1 2 0.50 cadd z31.d, z31.d, z31.d, #270
+# CHECK-NEXT: 1 2 0.50 cadd z31.h, z31.h, z31.h, #270
+# CHECK-NEXT: 1 2 0.50 cadd z31.s, z31.s, z31.s, #270
+# CHECK-NEXT: 1 3 1.00 cdot z0.d, z1.h, z15.h[1], #0
+# CHECK-NEXT: 1 3 1.00 cdot z0.d, z1.h, z31.h, #0
+# CHECK-NEXT: 1 3 1.00 cdot z0.d, z1.h, z31.h, #180
+# CHECK-NEXT: 1 3 1.00 cdot z0.d, z1.h, z31.h, #270
+# CHECK-NEXT: 1 3 1.00 cdot z0.d, z1.h, z31.h, #90
+# CHECK-NEXT: 1 3 0.50 cdot z0.s, z1.b, z31.b, #0
+# CHECK-NEXT: 1 3 0.50 cdot z0.s, z1.b, z7.b[3], #0
+# CHECK-NEXT: 1 3 1.00 cdot z29.d, z30.h, z0.h[0], #180
+# CHECK-NEXT: 1 3 1.00 cdot z31.d, z30.h, z7.h[1], #270
+# CHECK-NEXT: 1 3 1.00 cdot z5.d, z6.h, z3.h[0], #90
+# CHECK-NEXT: 1 3 1.00 clasta b0, p7, b0, z31.b
+# CHECK-NEXT: 1 3 1.00 clasta d0, p7, d0, z31.d
+# CHECK-NEXT: 1 3 1.00 clasta h0, p7, h0, z31.h
+# CHECK-NEXT: 1 3 1.00 clasta s0, p7, s0, z31.s
+# CHECK-NEXT: 2 8 1.00 clasta w0, p7, w0, z31.b
+# CHECK-NEXT: 2 8 1.00 clasta w0, p7, w0, z31.h
+# CHECK-NEXT: 2 8 1.00 clasta w0, p7, w0, z31.s
+# CHECK-NEXT: 2 8 1.00 clasta x0, p7, x0, z31.d
+# CHECK-NEXT: 1 3 1.00 clasta z0.b, p7, z0.b, z31.b
+# CHECK-NEXT: 1 3 1.00 clasta z0.d, p7, z0.d, z31.d
+# CHECK-NEXT: 1 3 1.00 clasta z0.h, p7, z0.h, z31.h
+# CHECK-NEXT: 1 3 1.00 clasta z0.s, p7, z0.s, z31.s
+# CHECK-NEXT: 1 3 1.00 clastb b0, p7, b0, z31.b
+# CHECK-NEXT: 1 3 1.00 clastb d0, p7, d0, z31.d
+# CHECK-NEXT: 1 3 1.00 clastb h0, p7, h0, z31.h
+# CHECK-NEXT: 1 3 1.00 clastb s0, p7, s0, z31.s
+# CHECK-NEXT: 2 8 1.00 clastb w0, p7, w0, z31.b
+# CHECK-NEXT: 2 8 1.00 clastb w0, p7, w0, z31.h
+# CHECK-NEXT: 2 8 1.00 clastb w0, p7, w0, z31.s
+# CHECK-NEXT: 2 8 1.00 clastb x0, p7, x0, z31.d
+# CHECK-NEXT: 1 3 1.00 clastb z0.b, p7, z0.b, z31.b
+# CHECK-NEXT: 1 3 1.00 clastb z0.d, p7, z0.d, z31.d
+# CHECK-NEXT: 1 3 1.00 clastb z0.h, p7, z0.h, z31.h
+# CHECK-NEXT: 1 3 1.00 clastb z0.s, p7, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.50 cls z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.50 cls z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 cls z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 cls z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 clz z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.50 clz z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 clz z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 clz z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 4 1.00 cmla z0.b, z1.b, z2.b, #0
+# CHECK-NEXT: 1 5 2.00 cmla z0.d, z1.d, z2.d, #0
+# CHECK-NEXT: 1 4 1.00 cmla z0.h, z1.h, z2.h, #0
+# CHECK-NEXT: 1 4 1.00 cmla z0.h, z1.h, z2.h[0], #0
+# CHECK-NEXT: 1 4 1.00 cmla z0.s, z1.s, z2.s, #0
+# CHECK-NEXT: 1 4 1.00 cmla z0.s, z1.s, z2.s[0], #0
+# CHECK-NEXT: 1 4 1.00 cmla z15.b, z16.b, z17.b, #270
+# CHECK-NEXT: 1 5 2.00 cmla z15.d, z16.d, z17.d, #270
+# CHECK-NEXT: 1 4 1.00 cmla z15.h, z16.h, z17.h, #270
+# CHECK-NEXT: 1 4 1.00 cmla z15.s, z16.s, z17.s, #270
+# CHECK-NEXT: 1 4 1.00 cmla z29.b, z30.b, z31.b, #90
+# CHECK-NEXT: 1 5 2.00 cmla z29.d, z30.d, z31.d, #90
+# CHECK-NEXT: 1 4 1.00 cmla z29.h, z30.h, z31.h, #90
+# CHECK-NEXT: 1 4 1.00 cmla z29.s, z30.s, z31.s, #90
+# CHECK-NEXT: 1 4 1.00 cmla z31.b, z31.b, z31.b, #180
+# CHECK-NEXT: 1 5 2.00 cmla z31.d, z31.d, z31.d, #180
+# CHECK-NEXT: 1 4 1.00 cmla z31.h, z30.h, z7.h[0], #180
+# CHECK-NEXT: 1 4 1.00 cmla z31.h, z31.h, z31.h, #180
+# CHECK-NEXT: 1 4 1.00 cmla z31.s, z30.s, z7.s[0], #180
+# CHECK-NEXT: 1 4 1.00 cmla z31.s, z31.s, z31.s, #180
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpeq p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmpge p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 1 3 1.00 cmpge p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 1 3 1.00 cmpge p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmpge p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpge p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmpge p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 1 3 1.00 cmpge p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 1 3 1.00 cmpge p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpge p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 1 3 1.00 cmpge p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 1 3 1.00 cmpge p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpge p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmpge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmpge p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 1 3 1.00 cmpge p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 1 3 1.00 cmpge p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpge p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmpge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmpgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmphi p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: 1 3 1.00 cmphi p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: 1 3 1.00 cmphi p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmphi p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphi p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmphi p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: 1 3 1.00 cmphi p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: 1 3 1.00 cmphi p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphi p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphi p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: 1 3 1.00 cmphi p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: 1 3 1.00 cmphi p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphi p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmphi p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmphi p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: 1 3 1.00 cmphi p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: 1 3 1.00 cmphi p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphi p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmphi p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmphs p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: 1 3 1.00 cmphs p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: 1 3 1.00 cmphs p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmphs p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphs p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmphs p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: 1 3 1.00 cmphs p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: 1 3 1.00 cmphs p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphs p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphs p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: 1 3 1.00 cmphs p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: 1 3 1.00 cmphs p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphs p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmphs p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmphs p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: 1 3 1.00 cmphs p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: 1 3 1.00 cmphs p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmphs p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmphs p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 3 1.00 cmple p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 1 3 1.00 cmple p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 1 3 1.00 cmple p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmple p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 1 3 1.00 cmple p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 1 3 1.00 cmple p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 1 3 1.00 cmple p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 1 3 1.00 cmple p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmple p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 1 3 1.00 cmple p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 1 3 1.00 cmple p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmplo p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: 1 3 1.00 cmplo p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: 1 3 1.00 cmplo p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmplo p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: 1 3 1.00 cmplo p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: 1 3 1.00 cmplo p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: 1 3 1.00 cmplo p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: 1 3 1.00 cmplo p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmplo p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: 1 3 1.00 cmplo p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: 1 3 1.00 cmplo p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpls p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: 1 3 1.00 cmpls p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: 1 3 1.00 cmpls p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpls p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: 1 3 1.00 cmpls p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: 1 3 1.00 cmpls p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: 1 3 1.00 cmpls p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: 1 3 1.00 cmpls p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpls p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: 1 3 1.00 cmpls p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: 1 3 1.00 cmpls p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmplt p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 1 3 1.00 cmplt p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 1 3 1.00 cmplt p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmplt p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 1 3 1.00 cmplt p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 1 3 1.00 cmplt p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 1 3 1.00 cmplt p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 1 3 1.00 cmplt p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmplt p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 1 3 1.00 cmplt p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 1 3 1.00 cmplt p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpne p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: 1 3 1.00 cmpne p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: 1 3 1.00 cmpne p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 1 3 1.00 cmpne p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpne p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: 1 3 1.00 cmpne p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: 1 3 1.00 cmpne p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpne p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: 1 3 1.00 cmpne p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: 1 3 1.00 cmpne p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpne p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 1 3 1.00 cmpne p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: 1 3 1.00 cmpne p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: 1 3 1.00 cmpne p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: 1 3 1.00 cmpne p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 cnot z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.50 cnot z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 cnot z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 cnot z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 cnt z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.50 cnt z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 cnt z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 cnt z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 cntb x0
+# CHECK-NEXT: 1 2 0.50 cntb x0, #28
+# CHECK-NEXT: 1 2 0.50 cntb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 cntb x0, pow2
+# CHECK-NEXT: 1 2 0.50 cntd x0
+# CHECK-NEXT: 1 2 0.50 cntd x0, #28
+# CHECK-NEXT: 1 2 0.50 cntd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 cntd x0, pow2
+# CHECK-NEXT: 1 2 0.50 cnth x0
+# CHECK-NEXT: 1 2 0.50 cnth x0, #28
+# CHECK-NEXT: 1 2 0.50 cnth x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 cnth x0, pow2
+# CHECK-NEXT: 1 2 0.50 cntp x0, p15, p0.b
+# CHECK-NEXT: 1 2 0.50 cntp x0, p15, p0.d
+# CHECK-NEXT: 1 2 0.50 cntp x0, p15, p0.h
+# CHECK-NEXT: 1 2 0.50 cntp x0, p15, p0.s
+# CHECK-NEXT: 1 2 0.50 cntw x0
+# CHECK-NEXT: 1 2 0.50 cntw x0, #28
+# CHECK-NEXT: 1 2 0.50 cntw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 cntw x0, pow2
+# CHECK-NEXT: 1 3 1.00 compact z31.d, p7, z31.d
+# CHECK-NEXT: 1 3 1.00 compact z31.s, p7, z31.s
+# CHECK-NEXT: 2 1 1.00 ctermeq w30, wzr
+# CHECK-NEXT: 2 1 1.00 ctermeq wzr, w30
+# CHECK-NEXT: 2 1 1.00 ctermeq x30, xzr
+# CHECK-NEXT: 2 1 1.00 ctermeq xzr, x30
+# CHECK-NEXT: 2 1 1.00 ctermne w30, wzr
+# CHECK-NEXT: 2 1 1.00 ctermne wzr, w30
+# CHECK-NEXT: 2 1 1.00 ctermne x30, xzr
+# CHECK-NEXT: 2 1 1.00 ctermne xzr, x30
+# CHECK-NEXT: 1 1 0.13 decb x0
+# CHECK-NEXT: 1 2 0.50 decb x0, #14
+# CHECK-NEXT: 1 2 0.50 decb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 decb x0, pow2
+# CHECK-NEXT: 1 2 0.50 decb x0, vl1
+# CHECK-NEXT: 1 1 0.13 decd x0
+# CHECK-NEXT: 1 2 0.50 decd x0, #14
+# CHECK-NEXT: 1 2 0.50 decd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 decd x0, pow2
+# CHECK-NEXT: 1 2 0.50 decd x0, vl1
+# CHECK-NEXT: 1 1 0.13 dech x0
+# CHECK-NEXT: 1 2 0.50 dech x0, #14
+# CHECK-NEXT: 1 2 0.50 dech x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 dech x0, pow2
+# CHECK-NEXT: 1 2 0.50 dech x0, vl1
+# CHECK-NEXT: 1 2 0.50 decp x0, p0.b
+# CHECK-NEXT: 1 2 0.50 decp x0, p0.d
+# CHECK-NEXT: 1 2 0.50 decp x0, p0.h
+# CHECK-NEXT: 1 2 0.50 decp x0, p0.s
+# CHECK-NEXT: 1 2 0.50 decp xzr, p15.b
+# CHECK-NEXT: 1 2 0.50 decp xzr, p15.d
+# CHECK-NEXT: 1 2 0.50 decp xzr, p15.h
+# CHECK-NEXT: 1 2 0.50 decp xzr, p15.s
+# CHECK-NEXT: 3 7 1.00 decp z31.d, p15.d
+# CHECK-NEXT: 3 7 1.00 decp z31.h, p15.h
+# CHECK-NEXT: 3 7 1.00 decp z31.s, p15.s
+# CHECK-NEXT: 1 1 0.13 decw x0
+# CHECK-NEXT: 1 2 0.50 decw x0, #14
+# CHECK-NEXT: 1 2 0.50 decw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 decw x0, pow2
+# CHECK-NEXT: 1 2 0.50 decw x0, vl1
+# CHECK-NEXT: 1 2 0.50 dupm z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: 1 2 0.50 dupm z0.s, #0xfffffff9
+# CHECK-NEXT: 1 2 0.50 dupm z23.h, #0xfff9
+# CHECK-NEXT: 1 2 0.50 dupm z5.b, #0xf9
+# CHECK-NEXT: 1 2 0.50 eor p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 2 0.50 eor z0.d, z0.d, #0x6
+# CHECK-NEXT: 1 2 0.50 eor z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: 1 2 0.50 eor z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 eor z0.s, z0.s, #0x6
+# CHECK-NEXT: 1 2 0.50 eor z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: 1 2 0.50 eor z23.d, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.50 eor z23.h, z23.h, #0x6
+# CHECK-NEXT: 1 2 0.50 eor z23.h, z23.h, #0xfff9
+# CHECK-NEXT: 1 2 0.50 eor z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 eor z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 eor z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 eor z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 eor z5.b, z5.b, #0x6
+# CHECK-NEXT: 1 2 0.50 eor z5.b, z5.b, #0xf9
+# CHECK-NEXT: 1 2 0.50 eor3 z29.d, z29.d, z30.d, z31.d
+# CHECK-NEXT: 1 2 0.50 eorbt z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 2 0.50 eorbt z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 eorbt z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 eorbt z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 eors p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 2 0.50 eortb z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 2 0.50 eortb z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 eortb z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 eortb z0.s, z1.s, z31.s
+# CHECK-NEXT: 2 6 1.00 eorv b0, p7, z31.b
+# CHECK-NEXT: 2 6 1.00 eorv d0, p7, z31.d
+# CHECK-NEXT: 2 6 1.00 eorv h0, p7, z31.h
+# CHECK-NEXT: 2 6 1.00 eorv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.50 ext z0.b, { z1.b, z2.b }, #0
+# CHECK-NEXT: 1 2 0.50 ext z31.b, z31.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 ext z31.b, z31.b, z0.b, #255
+# CHECK-NEXT: 1 2 0.50 ext z31.b, { z30.b, z31.b }, #255
+# CHECK-NEXT: 1 2 0.50 fabd z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.50 fabd z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.50 fabd z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.50 fabs z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 fabs z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 fabs z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 1.00 facge p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 facge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 2 1.00 facge p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 facge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 2 1.00 facge p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 2 1.00 facge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 2 1.00 facgt p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 facgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 2 1.00 facgt p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 facgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 2 1.00 facgt p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 2 1.00 facgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 2 0.50 fadd z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: 1 2 0.50 fadd z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.50 fadd z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 fadd z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: 1 2 0.50 fadd z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.50 fadd z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 fadd z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: 1 2 0.50 fadd z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.50 fadd z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 fadd z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.50 fadd z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.50 fadd z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 4 0.50 fadda d0, p7, d0, z31.d
+# CHECK-NEXT: 1 10 9.00 fadda h0, p7, h0, z31.h
+# CHECK-NEXT: 1 6 5.00 fadda s0, p7, s0, z31.s
+# CHECK-NEXT: 1 2 0.50 faddp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 faddp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 faddp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 2 4 1.00 faddv d0, p7, z31.d
+# CHECK-NEXT: 4 8 2.00 faddv h0, p7, z31.h
+# CHECK-NEXT: 3 6 1.50 faddv s0, p7, z31.s
+# CHECK-NEXT: 1 3 0.50 fcadd z0.d, p0/m, z0.d, z0.d, #90
+# CHECK-NEXT: 1 3 0.50 fcadd z0.h, p0/m, z0.h, z0.h, #90
+# CHECK-NEXT: 1 3 0.50 fcadd z0.s, p0/m, z0.s, z0.s, #90
+# CHECK-NEXT: 1 3 0.50 fcadd z31.d, p7/m, z31.d, z31.d, #270
+# CHECK-NEXT: 1 3 0.50 fcadd z31.h, p7/m, z31.h, z31.h, #270
+# CHECK-NEXT: 1 3 0.50 fcadd z31.s, p7/m, z31.s, z31.s, #270
+# CHECK-NEXT: 1 2 1.00 fcmeq p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmeq p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 fcmeq p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmeq p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 fcmeq p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmeq p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 2 1.00 fcmge p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmge p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 fcmge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 2 1.00 fcmge p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmge p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 fcmge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 2 1.00 fcmge p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmge p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 2 1.00 fcmge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 2 1.00 fcmgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: 1 5 0.50 fcmla z0.d, p0/m, z0.d, z0.d, #0
+# CHECK-NEXT: 1 5 0.50 fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: 1 5 0.50 fcmla z0.h, p0/m, z0.h, z0.h, #0
+# CHECK-NEXT: 1 5 0.50 fcmla z0.h, p0/m, z1.h, z2.h, #90
+# CHECK-NEXT: 1 5 0.50 fcmla z0.h, z0.h, z0.h[0], #0
+# CHECK-NEXT: 1 5 0.50 fcmla z0.s, p0/m, z0.s, z0.s, #0
+# CHECK-NEXT: 1 5 0.50 fcmla z0.s, p0/m, z1.s, z2.s, #90
+# CHECK-NEXT: 1 5 0.50 fcmla z21.s, z10.s, z5.s[1], #90
+# CHECK-NEXT: 1 5 0.50 fcmla z23.s, z13.s, z8.s[0], #270
+# CHECK-NEXT: 1 5 0.50 fcmla z29.d, p7/m, z30.d, z31.d, #180
+# CHECK-NEXT: 1 5 0.50 fcmla z29.h, p7/m, z30.h, z31.h, #180
+# CHECK-NEXT: 1 5 0.50 fcmla z29.s, p7/m, z30.s, z31.s, #180
+# CHECK-NEXT: 1 5 0.50 fcmla z31.d, p7/m, z31.d, z31.d, #270
+# CHECK-NEXT: 1 5 0.50 fcmla z31.h, p7/m, z31.h, z31.h, #270
+# CHECK-NEXT: 1 5 0.50 fcmla z31.h, z31.h, z7.h[3], #270
+# CHECK-NEXT: 1 5 0.50 fcmla z31.s, p7/m, z31.s, z31.s, #270
+# CHECK-NEXT: 1 2 1.00 fcmle p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmle p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmle p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmlt p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmlt p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmlt p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmne p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmne p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 fcmne p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmne p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 fcmne p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: 1 2 1.00 fcmne p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 2 1.00 fcmuo p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: 1 2 1.00 fcmuo p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: 1 2 1.00 fcmuo p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: 1 3 1.00 fcvt z0.d, p0/m, z0.h
+# CHECK-NEXT: 1 3 1.00 fcvt z0.d, p0/m, z0.s
+# CHECK-NEXT: 1 3 1.00 fcvt z0.h, p0/m, z0.d
+# CHECK-NEXT: 2 4 2.00 fcvt z0.h, p0/m, z0.s
+# CHECK-NEXT: 1 3 1.00 fcvt z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 2.00 fcvt z0.s, p0/m, z0.h
+# CHECK-NEXT: 2 4 2.00 fcvtlt z0.s, p0/m, z1.h
+# CHECK-NEXT: 1 3 1.00 fcvtlt z30.d, p7/m, z31.s
+# CHECK-NEXT: 2 4 2.00 fcvtnt z0.h, p0/m, z1.s
+# CHECK-NEXT: 1 3 1.00 fcvtnt z30.s, p7/m, z31.d
+# CHECK-NEXT: 1 3 1.00 fcvtx z0.s, p0/m, z0.d
+# CHECK-NEXT: 1 3 1.00 fcvtx z30.s, p7/m, z31.d
+# CHECK-NEXT: 1 3 1.00 fcvtxnt z0.s, p0/m, z1.d
+# CHECK-NEXT: 1 3 1.00 fcvtxnt z30.s, p7/m, z31.d
+# CHECK-NEXT: 1 3 1.00 fcvtzs z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 3 1.00 fcvtzs z0.d, p0/m, z0.h
+# CHECK-NEXT: 1 3 1.00 fcvtzs z0.d, p0/m, z0.s
+# CHECK-NEXT: 4 6 4.00 fcvtzs z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 3 1.00 fcvtzs z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 2.00 fcvtzs z0.s, p0/m, z0.h
+# CHECK-NEXT: 2 4 2.00 fcvtzs z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 3 1.00 fcvtzu z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 3 1.00 fcvtzu z0.d, p0/m, z0.h
+# CHECK-NEXT: 1 3 1.00 fcvtzu z0.d, p0/m, z0.s
+# CHECK-NEXT: 4 6 4.00 fcvtzu z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 3 1.00 fcvtzu z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 2.00 fcvtzu z0.s, p0/m, z0.h
+# CHECK-NEXT: 2 4 2.00 fcvtzu z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 14 2.00 fdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 13 8.00 fdiv z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 11 4.00 fdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 14 2.00 fdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 13 8.00 fdivr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 11 4.00 fdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 3 1.00 fexpa z0.d, z31.d
+# CHECK-NEXT: 1 3 1.00 fexpa z0.h, z31.h
+# CHECK-NEXT: 1 3 1.00 fexpa z0.s, z31.s
+# CHECK-NEXT: 1 3 1.00 flogb z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 4.00 flogb z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 2.00 flogb z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 4 0.50 fmad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 fmad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 fmad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 fmax z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: 1 2 0.50 fmax z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.50 fmax z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: 1 2 0.50 fmax z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.50 fmax z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: 1 2 0.50 fmax z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.50 fmax z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.50 fmax z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.50 fmax z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 2 0.50 fmaxnm z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: 1 2 0.50 fmaxnm z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.50 fmaxnm z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: 1 2 0.50 fmaxnm z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.50 fmaxnm z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: 1 2 0.50 fmaxnm z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.50 fmaxnm z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.50 fmaxnm z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.50 fmaxnm z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 2 0.50 fmaxnmp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 fmaxnmp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 fmaxnmp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 2 4 1.00 fmaxnmv d0, p7, z31.d
+# CHECK-NEXT: 4 8 2.00 fmaxnmv h0, p7, z31.h
+# CHECK-NEXT: 3 6 1.50 fmaxnmv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.50 fmaxp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 fmaxp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 fmaxp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 2 4 1.00 fmaxv d0, p7, z31.d
+# CHECK-NEXT: 4 8 2.00 fmaxv h0, p7, z31.h
+# CHECK-NEXT: 3 6 1.50 fmaxv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.50 fmin z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: 1 2 0.50 fmin z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.50 fmin z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: 1 2 0.50 fmin z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.50 fmin z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: 1 2 0.50 fmin z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.50 fmin z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.50 fmin z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.50 fmin z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 2 0.50 fminnm z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: 1 2 0.50 fminnm z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.50 fminnm z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: 1 2 0.50 fminnm z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.50 fminnm z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: 1 2 0.50 fminnm z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.50 fminnm z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.50 fminnm z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.50 fminnm z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 2 0.50 fminnmp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 fminnmp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 fminnmp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 2 4 1.00 fminnmv d0, p7, z31.d
+# CHECK-NEXT: 4 8 2.00 fminnmv h0, p7, z31.h
+# CHECK-NEXT: 3 6 1.50 fminnmv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.50 fminp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 fminp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 fminp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 2 4 1.00 fminv d0, p7, z31.d
+# CHECK-NEXT: 4 8 2.00 fminv h0, p7, z31.h
+# CHECK-NEXT: 3 6 1.50 fminv s0, p7, z31.s
+# CHECK-NEXT: 1 4 0.50 fmla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 fmla z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: 1 4 0.50 fmla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 fmla z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 fmla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 fmla z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 0.50 fmlalb z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: 1 4 0.50 fmlalb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.50 fmlalb z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 fmlalt z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: 1 4 0.50 fmlalt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.50 fmlalt z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 fmls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 fmls z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: 1 4 0.50 fmls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 fmls z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 fmls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 fmls z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 0.50 fmlslb z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: 1 4 0.50 fmlslb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.50 fmlslb z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: 1 4 0.50 fmlslt z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: 1 4 0.50 fmlslt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 0.50 fmlslt z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: 1 2 0.50 fmov z0.d, #-10.00000000
+# CHECK-NEXT: 1 2 0.50 fmov z0.d, #0.12500000
+# CHECK-NEXT: 1 2 0.50 fmov z0.d, p0/m, #-10.00000000
+# CHECK-NEXT: 1 2 0.50 fmov z0.d, p0/m, #0.12500000
+# CHECK-NEXT: 1 2 0.50 fmov z0.h, #-0.12500000
+# CHECK-NEXT: 1 2 0.50 fmov z0.h, p0/m, #-0.12500000
+# CHECK-NEXT: 1 2 0.50 fmov z0.s, #-0.12500000
+# CHECK-NEXT: 1 2 0.50 fmov z0.s, p0/m, #-0.12500000
+# CHECK-NEXT: 1 4 0.50 fmsb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 fmsb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 fmsb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 3 0.50 fmul z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: 1 3 0.50 fmul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 3 0.50 fmul z0.d, z0.d, z0.d[0]
+# CHECK-NEXT: 1 3 0.50 fmul z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 3 0.50 fmul z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: 1 3 0.50 fmul z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 3 0.50 fmul z0.h, z0.h, z0.h[0]
+# CHECK-NEXT: 1 3 0.50 fmul z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 3 0.50 fmul z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: 1 3 0.50 fmul z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 3 0.50 fmul z0.s, z0.s, z0.s[0]
+# CHECK-NEXT: 1 3 0.50 fmul z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 3 0.50 fmul z31.d, p7/m, z31.d, #2.0
+# CHECK-NEXT: 1 3 0.50 fmul z31.d, z31.d, z15.d[1]
+# CHECK-NEXT: 1 3 0.50 fmul z31.h, p7/m, z31.h, #2.0
+# CHECK-NEXT: 1 3 0.50 fmul z31.h, z31.h, z7.h[7]
+# CHECK-NEXT: 1 3 0.50 fmul z31.s, p7/m, z31.s, #2.0
+# CHECK-NEXT: 1 3 0.50 fmul z31.s, z31.s, z7.s[3]
+# CHECK-NEXT: 1 3 0.50 fmulx z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 3 0.50 fmulx z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 3 0.50 fmulx z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.50 fneg z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 fneg z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 fneg z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 4 0.50 fnmad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 fnmad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 fnmad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 fnmla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 fnmla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 fnmla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 fnmls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 fnmls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 fnmls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 fnmsb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 fnmsb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 fnmsb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 3 1.00 frecpe z0.d, z31.d
+# CHECK-NEXT: 4 6 4.00 frecpe z0.h, z31.h
+# CHECK-NEXT: 2 4 2.00 frecpe z0.s, z31.s
+# CHECK-NEXT: 1 4 0.50 frecps z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 frecps z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 frecps z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 3 1.00 frecpx z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 4.00 frecpx z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 2.00 frecpx z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 1.00 frinta z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 4.00 frinta z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 2.00 frinta z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 1.00 frinti z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 4.00 frinti z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 2.00 frinti z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 1.00 frintm z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 4.00 frintm z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 2.00 frintm z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 1.00 frintn z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 4.00 frintn z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 2.00 frintn z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 1.00 frintp z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 4.00 frintp z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 2.00 frintp z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 1.00 frintx z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 4.00 frintx z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 2.00 frintx z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 1.00 frintz z31.d, p7/m, z31.d
+# CHECK-NEXT: 4 6 4.00 frintz z31.h, p7/m, z31.h
+# CHECK-NEXT: 2 4 2.00 frintz z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 3 1.00 frsqrte z0.d, z31.d
+# CHECK-NEXT: 4 6 4.00 frsqrte z0.h, z31.h
+# CHECK-NEXT: 2 4 2.00 frsqrte z0.s, z31.s
+# CHECK-NEXT: 1 4 0.50 frsqrts z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 frsqrts z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 frsqrts z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 3 0.50 fscale z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 3 0.50 fscale z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 3 0.50 fscale z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 14 2.00 fsqrt z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 13 8.00 fsqrt z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 11 4.00 fsqrt z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 fsub z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: 1 2 0.50 fsub z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.50 fsub z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 fsub z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: 1 2 0.50 fsub z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.50 fsub z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 fsub z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: 1 2 0.50 fsub z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.50 fsub z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 fsub z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.50 fsub z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.50 fsub z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 2 0.50 fsubr z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: 1 2 0.50 fsubr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 2 0.50 fsubr z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: 1 2 0.50 fsubr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 2 0.50 fsubr z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: 1 2 0.50 fsubr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 2 0.50 fsubr z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: 1 2 0.50 fsubr z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: 1 2 0.50 fsubr z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: 1 4 0.50 ftmad z0.d, z0.d, z31.d, #7
+# CHECK-NEXT: 1 4 0.50 ftmad z0.h, z0.h, z31.h, #7
+# CHECK-NEXT: 1 4 0.50 ftmad z0.s, z0.s, z31.s, #7
+# CHECK-NEXT: 1 3 0.50 ftsmul z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 3 0.50 ftsmul z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 3 0.50 ftsmul z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 3 0.50 ftssel z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 3 0.50 ftssel z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 3 0.50 ftssel z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 histcnt z0.s, p0/z, z1.s, z2.s
+# CHECK-NEXT: 1 2 0.50 histcnt z29.d, p7/z, z30.d, z31.d
+# CHECK-NEXT: 1 2 0.50 histseg z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 1 0.13 incb x0
+# CHECK-NEXT: 1 2 0.50 incb x0, #14
+# CHECK-NEXT: 1 2 0.50 incb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 incb x0, pow2
+# CHECK-NEXT: 1 2 0.50 incb x0, vl1
+# CHECK-NEXT: 1 1 0.13 incd x0
+# CHECK-NEXT: 1 2 0.50 incd x0, #14
+# CHECK-NEXT: 1 2 0.50 incd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 incd x0, pow2
+# CHECK-NEXT: 1 2 0.50 incd x0, vl1
+# CHECK-NEXT: 1 2 0.50 incd z0.d
+# CHECK-NEXT: 1 2 0.50 incd z0.d, all, mul #16
+# CHECK-NEXT: 1 1 0.13 inch x0
+# CHECK-NEXT: 1 2 0.50 inch x0, #14
+# CHECK-NEXT: 1 2 0.50 inch x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 inch x0, pow2
+# CHECK-NEXT: 1 2 0.50 inch x0, vl1
+# CHECK-NEXT: 1 2 0.50 inch z0.h
+# CHECK-NEXT: 1 2 0.50 inch z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.50 incp x0, p0.b
+# CHECK-NEXT: 1 2 0.50 incp x0, p0.d
+# CHECK-NEXT: 1 2 0.50 incp x0, p0.h
+# CHECK-NEXT: 1 2 0.50 incp x0, p0.s
+# CHECK-NEXT: 1 2 0.50 incp xzr, p15.b
+# CHECK-NEXT: 1 2 0.50 incp xzr, p15.d
+# CHECK-NEXT: 1 2 0.50 incp xzr, p15.h
+# CHECK-NEXT: 1 2 0.50 incp xzr, p15.s
+# CHECK-NEXT: 3 7 1.00 incp z31.d, p15.d
+# CHECK-NEXT: 3 7 1.00 incp z31.h, p15.h
+# CHECK-NEXT: 3 7 1.00 incp z31.s, p15.s
+# CHECK-NEXT: 1 1 0.13 incw x0
+# CHECK-NEXT: 1 2 0.50 incw x0, #14
+# CHECK-NEXT: 1 2 0.50 incw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 incw x0, pow2
+# CHECK-NEXT: 1 2 0.50 incw x0, vl1
+# CHECK-NEXT: 1 2 0.50 incw z0.s
+# CHECK-NEXT: 1 2 0.50 incw z0.s, all, mul #16
+# CHECK-NEXT: 1 4 1.00 index z0.b, #0, #0
+# CHECK-NEXT: 2 5 2.00 index z0.d, #0, #0
+# CHECK-NEXT: 1 4 1.00 index z0.h, #0, #0
+# CHECK-NEXT: 2 7 1.00 index z0.h, w0, w0
+# CHECK-NEXT: 1 4 1.00 index z0.s, #0, #0
+# CHECK-NEXT: 2 7 1.00 index z21.b, w10, w21
+# CHECK-NEXT: 4 8 2.00 index z21.d, x10, x21
+# CHECK-NEXT: 2 7 1.00 index z21.s, w10, w21
+# CHECK-NEXT: 2 7 1.00 index z23.b, #13, w8
+# CHECK-NEXT: 2 7 1.00 index z23.b, w13, #8
+# CHECK-NEXT: 4 8 2.00 index z23.d, #13, x8
+# CHECK-NEXT: 4 8 2.00 index z23.d, x13, #8
+# CHECK-NEXT: 2 7 1.00 index z23.h, #13, w8
+# CHECK-NEXT: 2 7 1.00 index z23.h, w13, #8
+# CHECK-NEXT: 2 7 1.00 index z23.s, #13, w8
+# CHECK-NEXT: 2 7 1.00 index z23.s, w13, #8
+# CHECK-NEXT: 1 4 1.00 index z31.b, #-1, #-1
+# CHECK-NEXT: 2 7 1.00 index z31.b, #-1, wzr
+# CHECK-NEXT: 2 7 1.00 index z31.b, wzr, #-1
+# CHECK-NEXT: 2 7 1.00 index z31.b, wzr, wzr
+# CHECK-NEXT: 2 5 2.00 index z31.d, #-1, #-1
+# CHECK-NEXT: 4 8 2.00 index z31.d, #-1, xzr
+# CHECK-NEXT: 4 8 2.00 index z31.d, xzr, #-1
+# CHECK-NEXT: 4 8 2.00 index z31.d, xzr, xzr
+# CHECK-NEXT: 1 4 1.00 index z31.h, #-1, #-1
+# CHECK-NEXT: 2 7 1.00 index z31.h, #-1, wzr
+# CHECK-NEXT: 2 7 1.00 index z31.h, wzr, #-1
+# CHECK-NEXT: 2 7 1.00 index z31.h, wzr, wzr
+# CHECK-NEXT: 1 4 1.00 index z31.s, #-1, #-1
+# CHECK-NEXT: 2 7 1.00 index z31.s, #-1, wzr
+# CHECK-NEXT: 2 7 1.00 index z31.s, wzr, #-1
+# CHECK-NEXT: 2 7 1.00 index z31.s, wzr, wzr
+# CHECK-NEXT: 2 5 1.00 insr z0.b, w0
+# CHECK-NEXT: 2 5 1.00 insr z0.d, x0
+# CHECK-NEXT: 2 5 1.00 insr z0.h, w0
+# CHECK-NEXT: 2 5 1.00 insr z0.s, w0
+# CHECK-NEXT: 1 2 0.50 insr z31.b, b31
+# CHECK-NEXT: 2 5 1.00 insr z31.b, wzr
+# CHECK-NEXT: 1 2 0.50 insr z31.d, d31
+# CHECK-NEXT: 2 5 1.00 insr z31.d, xzr
+# CHECK-NEXT: 1 2 0.50 insr z31.h, h31
+# CHECK-NEXT: 2 5 1.00 insr z31.h, wzr
+# CHECK-NEXT: 1 2 0.50 insr z31.s, s31
+# CHECK-NEXT: 2 5 1.00 insr z31.s, wzr
+# CHECK-NEXT: 1 3 1.00 lasta b0, p7, z31.b
+# CHECK-NEXT: 1 3 1.00 lasta d0, p7, z31.d
+# CHECK-NEXT: 1 3 1.00 lasta h0, p7, z31.h
+# CHECK-NEXT: 1 3 1.00 lasta s0, p7, z31.s
+# CHECK-NEXT: 2 6 1.00 lasta w0, p7, z31.b
+# CHECK-NEXT: 2 6 1.00 lasta w0, p7, z31.h
+# CHECK-NEXT: 2 6 1.00 lasta w0, p7, z31.s
+# CHECK-NEXT: 2 6 1.00 lasta x0, p7, z31.d
+# CHECK-NEXT: 1 3 1.00 lastb b0, p7, z31.b
+# CHECK-NEXT: 1 3 1.00 lastb d0, p7, z31.d
+# CHECK-NEXT: 1 3 1.00 lastb h0, p7, z31.h
+# CHECK-NEXT: 1 3 1.00 lastb s0, p7, z31.s
+# CHECK-NEXT: 2 6 1.00 lastb w0, p7, z31.b
+# CHECK-NEXT: 2 6 1.00 lastb w0, p7, z31.h
+# CHECK-NEXT: 2 6 1.00 lastb w0, p7, z31.s
+# CHECK-NEXT: 2 6 1.00 lastb x0, p7, z31.d
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.b }, p0/z, [sp, x0]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 2.00 * ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.b }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z21.s }, p5/z, [x10, x21]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z23.d }, p3/z, [x13, x8]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z31.b }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 2.00 * ld1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 5 9 2.00 * ld1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 1 6 0.33 * ld1b { z5.h }, p3/z, [x17, x16]
+# CHECK-NEXT: 5 10 2.00 * ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 5 10 2.00 * ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z23.d }, p3/z, [sp, x8, lsl #3]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z23.d }, p3/z, [x13, x8, lsl #3]
+# CHECK-NEXT: 5 10 2.00 * ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: 1 6 0.33 * ld1d { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 2.00 * ld1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: 5 10 2.00 * ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 5 10 2.00 * ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 2.00 * ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z21.s }, p5/z, [x10, x21, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z23.d }, p3/z, [x13, x8, lsl #1]
+# CHECK-NEXT: 5 10 2.00 * ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 2.00 * ld1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 9 10 4.00 * ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 9 10 4.00 * ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 5 9 2.00 * ld1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z5.h }, p3/z, [sp, x16, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1h { z5.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z0.b }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z31.b }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z31.d }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z31.h }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rb { z31.s }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rd { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rd { z31.d }, p7/z, [sp, #504]
+# CHECK-NEXT: 1 6 0.33 * ld1rh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rh { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rh { z31.d }, p7/z, [sp, #126]
+# CHECK-NEXT: 1 6 0.33 * ld1rh { z31.h }, p7/z, [sp, #126]
+# CHECK-NEXT: 1 6 0.33 * ld1rh { z31.s }, p7/z, [sp, #126]
+# CHECK-NEXT: 1 6 0.33 * ld1rqb { z0.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rqb { z0.b }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rqb { z21.b }, p5/z, [x10, #112]
+# CHECK-NEXT: 1 6 0.33 * ld1rqb { z23.b }, p3/z, [x13, #-128]
+# CHECK-NEXT: 1 6 0.33 * ld1rqb { z31.b }, p7/z, [sp, #-16]
+# CHECK-NEXT: 1 6 0.33 * ld1rqd { z0.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 1 6 0.33 * ld1rqd { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rqd { z23.d }, p3/z, [x13, #-128]
+# CHECK-NEXT: 1 6 0.33 * ld1rqd { z23.d }, p3/z, [x13, #112]
+# CHECK-NEXT: 1 6 0.33 * ld1rqd { z31.d }, p7/z, [sp, #-16]
+# CHECK-NEXT: 1 6 0.33 * ld1rqh { z0.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1rqh { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rqh { z23.h }, p3/z, [x13, #-128]
+# CHECK-NEXT: 1 6 0.33 * ld1rqh { z23.h }, p3/z, [x13, #112]
+# CHECK-NEXT: 1 6 0.33 * ld1rqh { z31.h }, p7/z, [sp, #-16]
+# CHECK-NEXT: 1 6 0.33 * ld1rqw { z0.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1rqw { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rqw { z23.s }, p3/z, [x13, #-128]
+# CHECK-NEXT: 1 6 0.33 * ld1rqw { z23.s }, p3/z, [x13, #112]
+# CHECK-NEXT: 1 6 0.33 * ld1rqw { z31.s }, p7/z, [sp, #-16]
+# CHECK-NEXT: 1 6 0.33 * ld1rsb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rsb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rsb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rsb { z31.d }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rsb { z31.h }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rsb { z31.s }, p7/z, [sp, #63]
+# CHECK-NEXT: 1 6 0.33 * ld1rsh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rsh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rsh { z31.d }, p7/z, [sp, #126]
+# CHECK-NEXT: 1 6 0.33 * ld1rsh { z31.s }, p7/z, [sp, #126]
+# CHECK-NEXT: 1 6 0.33 * ld1rsw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rsw { z31.d }, p7/z, [sp, #252]
+# CHECK-NEXT: 1 6 0.33 * ld1rw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rw { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * ld1rw { z31.d }, p7/z, [sp, #252]
+# CHECK-NEXT: 1 6 0.33 * ld1rw { z31.s }, p7/z, [sp, #252]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.h }, p0/z, [sp, x0]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.h }, p0/z, [x0, x0]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z21.s }, p5/z, [x10, x21]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z23.d }, p3/z, [x13, x8]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 2.00 * ld1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 5 9 2.00 * ld1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 5 10 2.00 * ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 5 10 2.00 * ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 5 9 2.00 * ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 2.00 * ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z21.s }, p5/z, [sp, x21, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z21.s }, p5/z, [x10, x21, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z23.d }, p3/z, [x13, x8, lsl #1]
+# CHECK-NEXT: 5 10 2.00 * ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 2.00 * ld1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 1 6 0.33 * ld1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 9 10 4.00 * ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 9 10 4.00 * ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 5 9 2.00 * ld1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 5 10 2.00 * ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 5 10 2.00 * ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z23.d }, p3/z, [sp, x8, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z23.d }, p3/z, [x13, x8, lsl #2]
+# CHECK-NEXT: 5 10 2.00 * ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 2.00 * ld1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 5 10 2.00 * ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 5 10 2.00 * ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 5 9 2.00 * ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 2.00 * ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 5 9 2.00 * ld1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z21.s }, p5/z, [sp, x21, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z21.s }, p5/z, [x10, x21, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z23.d }, p3/z, [x13, x8, lsl #2]
+# CHECK-NEXT: 5 10 2.00 * ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 3 9 1.00 * ld1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 5 9 2.00 * ld1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 1 6 0.33 * ld1w { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 9 10 4.00 * ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: 9 10 4.00 * ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: 5 9 2.00 * ld1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: 6 9 1.00 * ld2b { z0.b, z1.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 4 8 1.00 * ld2b { z0.b, z1.b }, p0/z, [x0]
+# CHECK-NEXT: 4 8 1.00 * ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 4 8 1.00 * ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 6 9 1.00 * ld2b { z5.b, z6.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 6 9 1.00 * ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 4 8 1.00 * ld2d { z0.d, z1.d }, p0/z, [x0]
+# CHECK-NEXT: 4 8 1.00 * ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 4 8 1.00 * ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 6 9 1.00 * ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 6 9 1.00 * ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 4 8 1.00 * ld2h { z0.h, z1.h }, p0/z, [x0]
+# CHECK-NEXT: 4 8 1.00 * ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 4 8 1.00 * ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 6 9 1.00 * ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 6 9 1.00 * ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 4 8 1.00 * ld2w { z0.s, z1.s }, p0/z, [x0]
+# CHECK-NEXT: 4 8 1.00 * ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: 4 8 1.00 * ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: 6 9 1.00 * ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 9 10 1.50 * ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 6 9 1.50 * ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: 6 9 1.50 * ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 6 9 1.50 * ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 9 10 1.50 * ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 9 10 1.50 * ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 6 9 1.50 * ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: 6 9 1.50 * ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 6 9 1.50 * ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 9 10 1.50 * ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 9 10 1.50 * ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 6 9 1.50 * ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: 6 9 1.50 * ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 6 9 1.50 * ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 9 10 1.50 * ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 9 10 1.50 * ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 6 9 1.50 * ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: 6 9 1.50 * ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: 6 9 1.50 * ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: 9 10 1.50 * ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 16 10 4.00 * ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 12 9 4.00 * ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: 12 9 4.00 * ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 12 9 4.00 * ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 16 10 4.00 * ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: 16 10 4.00 * ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 12 9 4.00 * ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: 12 9 4.00 * ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 12 9 4.00 * ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 16 10 4.00 * ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: 16 10 4.00 * ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 12 9 4.00 * ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: 12 9 4.00 * ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 12 9 4.00 * ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 16 10 4.00 * ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: 16 10 4.00 * ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 12 9 4.00 * ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: 12 9 4.00 * ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: 12 9 4.00 * ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: 16 10 4.00 * ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z0.d }, p0/z, [x0, x0]
+# CHECK-NEXT: 5 9 2.00 * U ldff1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z0.h }, p0/z, [x0, x0]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z0.s }, p0/z, [x0, x0]
+# CHECK-NEXT: 5 9 2.00 * U ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 2.00 * U ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 5 9 2.00 * U ldff1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 3 9 1.00 * U ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * U ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z31.b }, p7/z, [sp]
+# CHECK-NEXT: 3 9 1.00 * U ldff1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 2.00 * U ldff1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z31.h }, p7/z, [sp]
+# CHECK-NEXT: 2 6 0.33 * U ldff1b { z31.s }, p7/z, [sp]
+# CHECK-NEXT: 5 9 2.00 * U ldff1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 2 6 0.33 * U ldff1d { z0.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 5 10 2.00 * U ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 5 10 2.00 * U ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 5 9 2.00 * U ldff1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 3 9 1.00 * U ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * U ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 5 10 2.00 * U ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: 3 9 1.00 * U ldff1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1d { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 2.00 * U ldff1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: 2 6 0.33 * U ldff1h { z0.d }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 5 10 2.00 * U ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 5 10 2.00 * U ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 5 9 2.00 * U ldff1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1h { z0.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 6 0.33 * U ldff1h { z0.s }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 5 9 2.00 * U ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 2.00 * U ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 5 9 2.00 * U ldff1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 3 9 1.00 * U ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * U ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 5 10 2.00 * U ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 3 9 1.00 * U ldff1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1h { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 2.00 * U ldff1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 2 6 0.33 * U ldff1h { z31.h }, p7/z, [sp]
+# CHECK-NEXT: 9 10 4.00 * U ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 9 10 4.00 * U ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 2 6 0.33 * U ldff1h { z31.s }, p7/z, [sp]
+# CHECK-NEXT: 5 9 2.00 * U ldff1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sb { z0.d }, p0/z, [x0, x0]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sb { z0.h }, p0/z, [x0, x0]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sb { z0.s }, p0/z, [x0, x0]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 3 9 1.00 * U ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * U ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 3 9 1.00 * U ldff1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sb { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sb { z31.h }, p7/z, [sp]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sb { z31.s }, p7/z, [sp]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sh { z0.d }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 5 10 2.00 * U ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 5 10 2.00 * U ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sh { z0.s }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 3 9 1.00 * U ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * U ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 5 10 2.00 * U ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: 3 9 1.00 * U ldff1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sh { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: 9 10 4.00 * U ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: 9 10 4.00 * U ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sh { z31.s }, p7/z, [sp]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sw { z0.d }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 5 10 2.00 * U ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 5 10 2.00 * U ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 3 9 1.00 * U ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * U ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 5 10 2.00 * U ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 3 9 1.00 * U ldff1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1sw { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 2.00 * U ldff1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 2 6 0.33 * U ldff1w { z0.d }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 5 10 2.00 * U ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 5 10 2.00 * U ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 5 9 2.00 * U ldff1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1w { z0.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 5 9 2.00 * U ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: 5 9 2.00 * U ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: 5 9 2.00 * U ldff1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: 3 9 1.00 * U ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: 3 9 1.00 * U ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: 5 10 2.00 * U ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: 3 9 1.00 * U ldff1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: 2 6 0.33 * U ldff1w { z31.d }, p7/z, [sp]
+# CHECK-NEXT: 5 9 2.00 * U ldff1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: 9 10 4.00 * U ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: 9 10 4.00 * U ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: 2 6 0.33 * U ldff1w { z31.s }, p7/z, [sp]
+# CHECK-NEXT: 5 9 2.00 * U ldff1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z21.b }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z31.b }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1b { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1d { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1d { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1h { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sh { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1w { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1w { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1w { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1w { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * U ldnf1w { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldnt1b { z0.b }, p0/z, [x0, x0]
+# CHECK-NEXT: 1 6 0.33 * ldnt1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ldnt1b { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 6 9 2.00 * ldnt1b { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: 1 6 0.33 * ldnt1b { z21.b }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldnt1b { z23.b }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ldnt1b { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 1.00 * ldnt1b { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 6 9 2.00 * ldnt1b { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: 6 9 2.00 * ldnt1b { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: 1 6 0.33 * ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: 1 6 0.33 * ldnt1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: 4 9 1.00 * ldnt1d { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 1 6 0.33 * ldnt1d { z21.d }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldnt1d { z23.d }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ldnt1d { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 1.00 * ldnt1d { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 4 9 1.00 * ldnt1h { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 1 6 0.33 * ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: 1 6 0.33 * ldnt1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: 6 9 2.00 * ldnt1h { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: 1 6 0.33 * ldnt1h { z21.h }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldnt1h { z23.h }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ldnt1h { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 1.00 * ldnt1h { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 6 9 2.00 * ldnt1h { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: 6 9 2.00 * ldnt1h { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: 4 9 1.00 * ldnt1sb { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 6 9 2.00 * ldnt1sb { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: 4 9 1.00 * ldnt1sb { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 1.00 * ldnt1sb { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 6 9 2.00 * ldnt1sb { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: 6 9 2.00 * ldnt1sb { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: 4 9 1.00 * ldnt1sh { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 6 9 2.00 * ldnt1sh { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: 4 9 1.00 * ldnt1sh { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 1.00 * ldnt1sh { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 6 9 2.00 * ldnt1sh { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: 6 9 2.00 * ldnt1sh { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: 4 9 1.00 * ldnt1sw { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 4 9 1.00 * ldnt1sw { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 1.00 * ldnt1sw { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 4 9 1.00 * ldnt1w { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: 1 6 0.33 * ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: 1 6 0.33 * ldnt1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: 6 9 2.00 * ldnt1w { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: 1 6 0.33 * ldnt1w { z21.s }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldnt1w { z23.s }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: 4 9 1.00 * ldnt1w { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: 4 9 1.00 * ldnt1w { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: 6 9 2.00 * ldnt1w { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: 6 9 2.00 * ldnt1w { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: 2 6 0.50 * ldr p0, [x0]
+# CHECK-NEXT: 2 6 0.50 * ldr p5, [x10, #255, mul vl]
+# CHECK-NEXT: 2 6 0.50 * ldr p7, [x13, #-256, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldr z0, [x0]
+# CHECK-NEXT: 1 6 0.33 * ldr z23, [x13, #255, mul vl]
+# CHECK-NEXT: 1 6 0.33 * ldr z31, [sp, #-256, mul vl]
+# CHECK-NEXT: 1 2 1.00 lsl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: 1 2 1.00 lsl z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 1.00 lsl z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: 1 2 1.00 lsl z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 1.00 lsl z0.b, z1.b, z2.d
+# CHECK-NEXT: 1 2 1.00 lsl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: 1 2 1.00 lsl z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 1.00 lsl z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 1.00 lsl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: 1 2 1.00 lsl z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 1.00 lsl z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: 1 2 1.00 lsl z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 1.00 lsl z0.h, z1.h, z2.d
+# CHECK-NEXT: 1 2 1.00 lsl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: 1 2 1.00 lsl z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 1.00 lsl z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: 1 2 1.00 lsl z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 1.00 lsl z0.s, z1.s, z2.d
+# CHECK-NEXT: 1 2 1.00 lsl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: 1 2 1.00 lsl z31.b, z31.b, #7
+# CHECK-NEXT: 1 2 1.00 lsl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: 1 2 1.00 lsl z31.d, z31.d, #63
+# CHECK-NEXT: 1 2 1.00 lsl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: 1 2 1.00 lsl z31.h, z31.h, #15
+# CHECK-NEXT: 1 2 1.00 lsl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: 1 2 1.00 lsl z31.s, z31.s, #31
+# CHECK-NEXT: 1 2 1.00 lslr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 1.00 lslr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 1.00 lslr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 1.00 lslr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 1.00 lsr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 1 2 1.00 lsr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 1.00 lsr z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: 1 2 1.00 lsr z0.b, z0.b, #1
+# CHECK-NEXT: 1 2 1.00 lsr z0.b, z1.b, z2.d
+# CHECK-NEXT: 1 2 1.00 lsr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 1 2 1.00 lsr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 1.00 lsr z0.d, z0.d, #1
+# CHECK-NEXT: 1 2 1.00 lsr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 1 2 1.00 lsr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 1.00 lsr z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: 1 2 1.00 lsr z0.h, z0.h, #1
+# CHECK-NEXT: 1 2 1.00 lsr z0.h, z1.h, z2.d
+# CHECK-NEXT: 1 2 1.00 lsr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 1 2 1.00 lsr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 1.00 lsr z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: 1 2 1.00 lsr z0.s, z0.s, #1
+# CHECK-NEXT: 1 2 1.00 lsr z0.s, z1.s, z2.d
+# CHECK-NEXT: 1 2 1.00 lsr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 1 2 1.00 lsr z31.b, z31.b, #8
+# CHECK-NEXT: 1 2 1.00 lsr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 1 2 1.00 lsr z31.d, z31.d, #64
+# CHECK-NEXT: 1 2 1.00 lsr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 1 2 1.00 lsr z31.h, z31.h, #16
+# CHECK-NEXT: 1 2 1.00 lsr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 1 2 1.00 lsr z31.s, z31.s, #32
+# CHECK-NEXT: 1 2 1.00 lsrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 1.00 lsrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 1.00 lsrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 1.00 lsrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 4 1.00 mad z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: 1 5 2.00 mad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 1.00 mad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 mad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 2 3 1.00 match p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 2 3 1.00 match p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 2 2 1.00 match p15.b, p7/z, z30.b, z31.b
+# CHECK-NEXT: 2 2 1.00 match p15.h, p7/z, z30.h, z31.h
+# CHECK-NEXT: 1 4 1.00 mla z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: 1 5 2.00 mla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 5 2.00 mla z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: 1 4 1.00 mla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 mla z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 mla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 mla z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 1.00 mls z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: 1 5 2.00 mls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 5 2.00 mls z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: 1 4 1.00 mls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 mls z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 mls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 mls z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 2 0.50 mov p0.b, p0.b
+# CHECK-NEXT: 1 1 0.50 mov p0.b, p0/m, p0.b
+# CHECK-NEXT: 1 2 0.50 mov p0.b, p0/z, p0.b
+# CHECK-NEXT: 1 2 0.50 mov p15.b, p15.b
+# CHECK-NEXT: 1 1 0.50 mov p15.b, p15/m, p15.b
+# CHECK-NEXT: 1 2 0.50 mov p15.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.50 mov z0.b, #127
+# CHECK-NEXT: 1 2 0.50 mov z0.b, b0
+# CHECK-NEXT: 1 2 0.50 mov z0.b, p0/m, b0
+# CHECK-NEXT: 2 5 1.00 mov z0.b, p0/m, w0
+# CHECK-NEXT: 1 2 0.50 mov z0.b, p0/z, #127
+# CHECK-NEXT: 1 3 1.00 mov z0.b, w0
+# CHECK-NEXT: 1 2 0.50 mov z0.d, #0
+# CHECK-NEXT: 1 2 0.50 mov z0.d, #0xe0000000000003ff
+# CHECK-NEXT: 1 2 0.50 mov z0.d, #0xffffffffffff7fff
+# CHECK-NEXT: 1 2 0.50 mov z0.d, #32768
+# CHECK-NEXT: 1 2 0.50 mov z0.d, d0
+# CHECK-NEXT: 1 2 0.50 mov z0.d, p0/m, d0
+# CHECK-NEXT: 2 5 1.00 mov z0.d, p0/m, x0
+# CHECK-NEXT: 1 3 1.00 mov z0.d, x0
+# CHECK-NEXT: 1 2 0.50 mov z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 mov z0.h, #-256
+# CHECK-NEXT: 1 2 0.50 mov z0.h, #-32768
+# CHECK-NEXT: 1 2 0.50 mov z0.h, #0
+# CHECK-NEXT: 1 2 0.50 mov z0.h, #32512
+# CHECK-NEXT: 1 2 0.50 mov z0.h, #32767
+# CHECK-NEXT: 1 2 0.50 mov z0.h, h0
+# CHECK-NEXT: 1 2 0.50 mov z0.h, p0/m, h0
+# CHECK-NEXT: 2 5 1.00 mov z0.h, p0/m, w0
+# CHECK-NEXT: 1 2 0.50 mov z0.h, p0/z, #32512
+# CHECK-NEXT: 1 3 1.00 mov z0.h, w0
+# CHECK-NEXT: 1 2 0.50 mov z0.q, q0
+# CHECK-NEXT: 1 2 0.50 mov z0.s, #0
+# CHECK-NEXT: 1 2 0.50 mov z0.s, #0xffff7fff
+# CHECK-NEXT: 1 2 0.50 mov z0.s, #32768
+# CHECK-NEXT: 1 2 0.50 mov z0.s, p0/m, s0
+# CHECK-NEXT: 2 5 1.00 mov z0.s, p0/m, w0
+# CHECK-NEXT: 1 2 0.50 mov z0.s, s0
+# CHECK-NEXT: 1 3 1.00 mov z0.s, w0
+# CHECK-NEXT: 1 2 0.50 mov z21.d, #-128
+# CHECK-NEXT: 1 2 0.50 mov z21.d, #-32768
+# CHECK-NEXT: 1 2 0.50 mov z21.d, #127
+# CHECK-NEXT: 1 2 0.50 mov z21.d, #32512
+# CHECK-NEXT: 1 2 0.50 mov z21.d, p0/z, #-128
+# CHECK-NEXT: 1 2 0.50 mov z21.d, p0/z, #-32768
+# CHECK-NEXT: 1 2 0.50 mov z21.d, p0/z, #127
+# CHECK-NEXT: 1 2 0.50 mov z21.d, p0/z, #32512
+# CHECK-NEXT: 1 2 0.50 mov z21.d, p15/m, #-128
+# CHECK-NEXT: 1 2 0.50 mov z21.d, p15/m, #-32768
+# CHECK-NEXT: 1 2 0.50 mov z21.h, #-128
+# CHECK-NEXT: 1 2 0.50 mov z21.h, #-32768
+# CHECK-NEXT: 1 2 0.50 mov z21.h, #127
+# CHECK-NEXT: 1 2 0.50 mov z21.h, #32512
+# CHECK-NEXT: 1 2 0.50 mov z21.h, p0/z, #-128
+# CHECK-NEXT: 1 2 0.50 mov z21.h, p0/z, #-32768
+# CHECK-NEXT: 1 2 0.50 mov z21.h, p0/z, #127
+# CHECK-NEXT: 1 2 0.50 mov z21.h, p0/z, #32512
+# CHECK-NEXT: 1 2 0.50 mov z21.h, p15/m, #-128
+# CHECK-NEXT: 1 2 0.50 mov z21.h, p15/m, #-32768
+# CHECK-NEXT: 1 2 0.50 mov z21.s, #-128
+# CHECK-NEXT: 1 2 0.50 mov z21.s, #-32768
+# CHECK-NEXT: 1 2 0.50 mov z21.s, #127
+# CHECK-NEXT: 1 2 0.50 mov z21.s, #32512
+# CHECK-NEXT: 1 2 0.50 mov z21.s, p0/z, #-128
+# CHECK-NEXT: 1 2 0.50 mov z21.s, p0/z, #-32768
+# CHECK-NEXT: 1 2 0.50 mov z21.s, p0/z, #127
+# CHECK-NEXT: 1 2 0.50 mov z21.s, p0/z, #32512
+# CHECK-NEXT: 1 2 0.50 mov z21.s, p15/m, #-128
+# CHECK-NEXT: 1 2 0.50 mov z21.s, p15/m, #-32768
+# CHECK-NEXT: 1 2 0.50 mov z31.b, p15/m, z31.b
+# CHECK-NEXT: 1 2 0.50 mov z31.b, p7/m, b31
+# CHECK-NEXT: 1 2 0.50 movprfx z31, z6
+# CHECK-NEXT: 2 5 1.00 mov z31.b, p7/m, wsp
+# CHECK-NEXT: 1 3 1.00 mov z31.b, wsp
+# CHECK-NEXT: 1 2 0.50 mov z31.b, z31.b[63]
+# CHECK-NEXT: 1 2 0.50 mov z31.d, p15/m, z31.d
+# CHECK-NEXT: 1 2 0.50 mov z31.d, p7/m, d31
+# CHECK-NEXT: 1 2 0.50 movprfx z31.d, p7/z, z6.d
+# CHECK-NEXT: 2 5 1.00 mov z31.d, p7/m, sp
+# CHECK-NEXT: 1 3 1.00 mov z31.d, sp
+# CHECK-NEXT: 1 2 0.50 mov z31.d, z0.d
+# CHECK-NEXT: 1 2 0.50 mov z31.d, z31.d[7]
+# CHECK-NEXT: 1 2 0.50 mov z31.h, p15/m, z31.h
+# CHECK-NEXT: 1 2 0.50 mov z31.h, p7/m, h31
+# CHECK-NEXT: 2 5 1.00 mov z31.h, p7/m, wsp
+# CHECK-NEXT: 1 3 1.00 mov z31.h, wsp
+# CHECK-NEXT: 1 2 0.50 mov z31.h, z31.h[31]
+# CHECK-NEXT: 1 2 0.50 mov z31.s, p15/m, z31.s
+# CHECK-NEXT: 1 2 0.50 mov z31.s, p7/m, s31
+# CHECK-NEXT: 2 5 1.00 mov z31.s, p7/m, wsp
+# CHECK-NEXT: 1 3 1.00 mov z31.s, wsp
+# CHECK-NEXT: 1 2 0.50 mov z31.s, z31.s[15]
+# CHECK-NEXT: 1 2 0.50 mov z5.b, #-1
+# CHECK-NEXT: 1 2 0.50 mov z5.b, #-128
+# CHECK-NEXT: 1 2 0.50 mov z5.b, #127
+# CHECK-NEXT: 1 2 0.50 mov z5.b, p0/z, #-1
+# CHECK-NEXT: 1 2 0.50 mov z5.b, p0/z, #-128
+# CHECK-NEXT: 1 2 0.50 mov z5.b, p0/z, #127
+# CHECK-NEXT: 1 2 0.50 mov z5.b, p15/m, #-128
+# CHECK-NEXT: 1 2 0.50 mov z5.d, #-6
+# CHECK-NEXT: 1 2 0.50 mov z5.h, #-6
+# CHECK-NEXT: 1 2 0.50 mov z5.q, z17.q[3]
+# CHECK-NEXT: 1 2 0.50 mov z5.s, #-6
+# CHECK-NEXT: 1 2 0.50 movs p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 movs p0.b, p0/z, p0.b
+# CHECK-NEXT: 1 2 0.50 movs p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 movs p15.b, p15/z, p15.b
+# CHECK-NEXT: 1 1 0.10 U mrs x3, ID_AA64ZFR0_EL1
+# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL1
+# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL12
+# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL2
+# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL3
+# CHECK-NEXT: 1 4 1.00 msb z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: 1 5 2.00 msb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: 1 4 1.00 msb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 msb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL1, x3
+# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL12, x3
+# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL2, x3
+# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL3, x3
+# CHECK-NEXT: 1 4 1.00 mul z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: 1 4 1.00 mul z0.b, z1.b, z2.b
+# CHECK-NEXT: 2 5 2.00 mul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 2 5 2.00 mul z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: 1 4 1.00 mul z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 4 1.00 mul z0.h, z1.h, z2.h
+# CHECK-NEXT: 1 4 1.00 mul z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 mul z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 4 1.00 mul z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 1.00 mul z29.s, z30.s, z31.s
+# CHECK-NEXT: 1 4 1.00 mul z31.b, z31.b, #-128
+# CHECK-NEXT: 1 4 1.00 mul z31.b, z31.b, #127
+# CHECK-NEXT: 2 5 2.00 mul z31.d, z31.d, #-128
+# CHECK-NEXT: 2 5 2.00 mul z31.d, z31.d, #127
+# CHECK-NEXT: 2 5 2.00 mul z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 4 1.00 mul z31.h, z31.h, #-128
+# CHECK-NEXT: 1 4 1.00 mul z31.h, z31.h, #127
+# CHECK-NEXT: 1 4 1.00 mul z31.s, z31.s, #-128
+# CHECK-NEXT: 1 4 1.00 mul z31.s, z31.s, #127
+# CHECK-NEXT: 1 2 0.50 nand p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 nand p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 nands p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 nands p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 nbsl z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: 1 2 0.50 neg z0.b, p0/m, z0.b
+# CHECK-NEXT: 1 2 0.50 neg z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.50 neg z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 2 0.50 neg z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 2 0.50 neg z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.50 neg z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 neg z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 neg z31.s, p7/m, z31.s
+# CHECK-NEXT: 2 3 1.00 nmatch p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: 2 3 1.00 nmatch p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: 2 2 1.00 nmatch p15.b, p7/z, z30.b, z31.b
+# CHECK-NEXT: 2 2 1.00 nmatch p15.h, p7/z, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 nor p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 nor p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 nors p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 nors p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 not p0.b, p0/z, p0.b
+# CHECK-NEXT: 1 2 0.50 not p15.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.50 not z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.50 not z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 not z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 not z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 nots p0.b, p0/z, p0.b
+# CHECK-NEXT: 1 2 0.50 nots p15.b, p15/z, p15.b
+# CHECK-NEXT: 1 2 0.50 orn p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 orn p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 orns p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 orns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 orr p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 1 2 0.50 orr z0.d, z0.d, #0x6
+# CHECK-NEXT: 1 2 0.50 orr z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: 1 2 0.50 orr z0.s, z0.s, #0x6
+# CHECK-NEXT: 1 2 0.50 orr z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: 1 2 0.50 orr z23.d, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.50 orr z23.h, z23.h, #0x6
+# CHECK-NEXT: 1 2 0.50 orr z23.h, z23.h, #0xfff9
+# CHECK-NEXT: 1 2 0.50 orr z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 orr z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 orr z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 orr z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 orr z5.b, z5.b, #0x6
+# CHECK-NEXT: 1 2 0.50 orr z5.b, z5.b, #0xf9
+# CHECK-NEXT: 1 2 0.50 orrs p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: 2 6 1.00 orv b0, p7, z31.b
+# CHECK-NEXT: 2 6 1.00 orv d0, p7, z31.d
+# CHECK-NEXT: 2 6 1.00 orv h0, p7, z31.h
+# CHECK-NEXT: 2 6 1.00 orv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.50 pfalse p15.b
+# CHECK-NEXT: 1 2 0.50 pfirst p0.b, p15, p0.b
+# CHECK-NEXT: 1 2 0.50 pfirst p15.b, p15, p15.b
+# CHECK-NEXT: 1 2 0.50 pmul z0.b, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 pmul z29.b, z30.b, z31.b
+# CHECK-NEXT: 1 2 0.50 pmullb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 pmullb z29.q, z30.d, z31.d
+# CHECK-NEXT: 1 2 0.50 pmullb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 pmullt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 pmullt z29.q, z30.d, z31.d
+# CHECK-NEXT: 1 2 0.50 pmullt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 pnext p0.b, p15, p0.b
+# CHECK-NEXT: 1 2 0.50 pnext p0.d, p15, p0.d
+# CHECK-NEXT: 1 2 0.50 pnext p0.h, p15, p0.h
+# CHECK-NEXT: 1 2 0.50 pnext p0.s, p15, p0.s
+# CHECK-NEXT: 1 2 0.50 pnext p15.b, p15, p15.b
+# CHECK-NEXT: 1 4 0.33 * * U prfb #14, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb #15, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb #6, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb #7, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb #7, p3, [z13.s, #31]
+# CHECK-NEXT: 1 4 0.33 * * U prfb #7, p3, [z13.s]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1keep, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1keep, p0, [x0, z0.d]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1keep, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl3strm, p5, [x10, z21.d, sxtw]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl3strm, p5, [x10, z21.s, uxtw]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl3strm, p5, [z10.d, #31]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pldl3strm, p5, [z10.d]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pstl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pstl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pstl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pstl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pstl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfb pstl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #14, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p7, [z31.d, #248]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p7, [z31.d]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p7, [z31.s, #248]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #15, p7, [z31.s]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #6, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd #7, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0, z0.d, lsl #3]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0, z0.s, sxtw #3]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0, z0.s, uxtw #3]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pldl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pstl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pstl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pstl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pstl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pstl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfd pstl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #14, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p7, [z31.d, #62]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p7, [z31.d]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p7, [z31.s, #62]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #15, p7, [z31.s]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #6, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh #7, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl1keep, p0, [x0, z0.d, lsl #1]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl3strm, p5, [x10, z21.d, sxtw #1]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl3strm, p5, [x10, z21.d, uxtw #1]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl3strm, p5, [x10, z21.s, sxtw #1]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pldl3strm, p5, [x10, z21.s, uxtw #1]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pstl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pstl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pstl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pstl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pstl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfh pstl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #14, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p7, [z31.d, #124]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p7, [z31.d]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p7, [z31.s, #124]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #15, p7, [z31.s]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #6, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #7, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw #7, p3, [x13, z8.d, uxtw #2]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl1keep, p0, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl1keep, p0, [x0, z0.s, uxtw #2]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl3strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl3strm, p5, [x10, z21.d, lsl #2]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pldl3strm, p5, [x10, z21.s, sxtw #2]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pstl1keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pstl1strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pstl2keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pstl2strm, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pstl3keep, p0, [x0]
+# CHECK-NEXT: 1 4 0.33 * * U prfw pstl3strm, p0, [x0]
+# CHECK-NEXT: 1 1 0.50 ptest p15, p0.b
+# CHECK-NEXT: 1 1 0.50 ptest p15, p15.b
+# CHECK-NEXT: 1 2 0.50 ptrue p0.b, pow2
+# CHECK-NEXT: 1 2 0.50 ptrue p0.d, pow2
+# CHECK-NEXT: 1 2 0.50 ptrue p0.h, pow2
+# CHECK-NEXT: 1 2 0.50 ptrue p0.s, pow2
+# CHECK-NEXT: 1 2 0.50 ptrue p15.b
+# CHECK-NEXT: 1 2 0.50 ptrue p15.d
+# CHECK-NEXT: 1 2 0.50 ptrue p15.h
+# CHECK-NEXT: 1 2 0.50 ptrue p15.s
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #14
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #15
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #16
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #17
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #18
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #19
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #20
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #21
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #22
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #23
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #24
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #25
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #26
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #27
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, #28
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, mul3
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, mul4
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl1
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl128
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl16
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl2
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl256
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl3
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl32
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl4
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl5
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl6
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl64
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl7
+# CHECK-NEXT: 1 2 0.50 ptrue p7.s, vl8
+# CHECK-NEXT: 1 2 0.50 ptrues p0.b, pow2
+# CHECK-NEXT: 1 2 0.50 ptrues p0.d, pow2
+# CHECK-NEXT: 1 2 0.50 ptrues p0.h, pow2
+# CHECK-NEXT: 1 2 0.50 ptrues p0.s, pow2
+# CHECK-NEXT: 1 2 0.50 ptrues p15.b
+# CHECK-NEXT: 1 2 0.50 ptrues p15.d
+# CHECK-NEXT: 1 2 0.50 ptrues p15.h
+# CHECK-NEXT: 1 2 0.50 ptrues p15.s
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #14
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #15
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #16
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #17
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #18
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #19
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #20
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #21
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #22
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #23
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #24
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #25
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #26
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #27
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, #28
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, mul3
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, mul4
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl1
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl128
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl16
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl2
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl256
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl3
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl32
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl4
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl5
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl6
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl64
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl7
+# CHECK-NEXT: 1 2 0.50 ptrues p7.s, vl8
+# CHECK-NEXT: 1 2 0.50 punpkhi p0.h, p0.b
+# CHECK-NEXT: 1 2 0.50 punpkhi p15.h, p15.b
+# CHECK-NEXT: 1 2 0.50 punpklo p0.h, p0.b
+# CHECK-NEXT: 1 2 0.50 punpklo p15.h, p15.b
+# CHECK-NEXT: 1 2 0.50 raddhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 raddhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 raddhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 raddhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 raddhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 raddhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 rax1 z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 rbit z0.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.50 rbit z0.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 rbit z0.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 rbit z0.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 1.00 * U rdffr p0.b
+# CHECK-NEXT: 2 4 1.00 * U rdffr p0.b, p0/z
+# CHECK-NEXT: 1 2 1.00 * U rdffr p15.b
+# CHECK-NEXT: 2 4 1.00 * U rdffr p15.b, p15/z
+# CHECK-NEXT: 2 4 1.00 U rdffrs p0.b, p0/z
+# CHECK-NEXT: 2 4 1.00 U rdffrs p15.b, p15/z
+# CHECK-NEXT: 1 2 0.50 rdvl x0, #0
+# CHECK-NEXT: 1 2 0.50 rdvl x21, #-32
+# CHECK-NEXT: 1 2 0.50 rdvl x23, #31
+# CHECK-NEXT: 1 2 0.50 rdvl xzr, #-1
+# CHECK-NEXT: 1 2 0.50 rev p0.b, p1.b
+# CHECK-NEXT: 1 2 0.50 rev p0.d, p1.d
+# CHECK-NEXT: 1 2 0.50 rev p0.h, p1.h
+# CHECK-NEXT: 1 2 0.50 rev p0.s, p1.s
+# CHECK-NEXT: 1 2 0.50 rev z0.b, z31.b
+# CHECK-NEXT: 1 2 0.50 rev z0.d, z31.d
+# CHECK-NEXT: 1 2 0.50 rev z0.h, z31.h
+# CHECK-NEXT: 1 2 0.50 rev z0.s, z31.s
+# CHECK-NEXT: 1 2 0.50 revb z0.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 revb z0.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 revb z0.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 revh z0.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 revh z0.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 revw z0.d, p7/m, z31.d
+# CHECK-NEXT: 1 4 0.50 rshrnb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 rshrnb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 rshrnb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 rshrnb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 rshrnb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 rshrnb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.50 rshrnt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 rshrnt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 rshrnt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 rshrnt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 rshrnt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 rshrnt z31.s, z31.d, #32
+# CHECK-NEXT: 1 2 0.50 rsubhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 rsubhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 rsubhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 rsubhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 rsubhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 rsubhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 saba z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 saba z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 saba z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 saba z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sabalb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sabalb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 sabalb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 sabalt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sabalt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 sabalt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 sabd z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 sabd z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 sabd z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 sabd z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 sabdlb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 sabdlb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 sabdlb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 sabdlt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 sabdlt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 sabdlt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 4 0.50 sadalp z0.h, p0/m, z1.b
+# CHECK-NEXT: 1 4 0.50 sadalp z29.s, p0/m, z30.h
+# CHECK-NEXT: 1 4 0.50 sadalp z30.d, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 saddlb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 saddlb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 saddlb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 saddlbt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 saddlbt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 2 0.50 saddlbt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 saddlt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 saddlt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 saddlt z31.d, z31.s, z31.s
+# CHECK-NEXT: 6 9 4.00 saddv d0, p7, z31.b
+# CHECK-NEXT: 4 8 2.00 saddv d0, p7, z31.h
+# CHECK-NEXT: 4 6 2.00 saddv d0, p7, z31.s
+# CHECK-NEXT: 1 2 0.50 saddwb z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.50 saddwb z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.50 saddwb z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 2 0.50 saddwt z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.50 saddwt z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.50 saddwt z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 2 0.50 sbclb z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 sbclb z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 sbclt z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 sbclt z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 3 1.00 scvtf z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 3 1.00 scvtf z0.d, p0/m, z0.s
+# CHECK-NEXT: 1 3 1.00 scvtf z0.h, p0/m, z0.d
+# CHECK-NEXT: 4 6 4.00 scvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: 2 4 2.00 scvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: 1 3 1.00 scvtf z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 2.00 scvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 20 20.00 sdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 12 11.00 sdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 20 20.00 sdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 12 11.00 sdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 3 1.00 sdot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: 1 3 1.00 sdot z0.d, z1.h, z31.h
+# CHECK-NEXT: 1 3 0.50 sdot z0.s, z1.b, z31.b
+# CHECK-NEXT: 1 3 0.50 sdot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: 1 1 0.50 sel p0.b, p1, p2.b, p3.b
+# CHECK-NEXT: 1 2 0.50 sel z23.b, p11, z13.b, z8.b
+# CHECK-NEXT: 1 2 0.50 sel z23.d, p11, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.50 sel z23.h, p11, z13.h, z8.h
+# CHECK-NEXT: 1 2 0.50 sel z23.s, p11, z13.s, z8.s
+# CHECK-NEXT: 1 2 1.00 * U setffr
+# CHECK-NEXT: 1 2 0.50 shadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 shadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 shadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 shadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 shrnb z0.b, z0.h, #1
+# CHECK-NEXT: 1 2 0.50 shrnb z0.h, z0.s, #1
+# CHECK-NEXT: 1 2 0.50 shrnb z0.s, z0.d, #1
+# CHECK-NEXT: 1 2 0.50 shrnb z31.b, z31.h, #8
+# CHECK-NEXT: 1 2 0.50 shrnb z31.h, z31.s, #16
+# CHECK-NEXT: 1 2 0.50 shrnb z31.s, z31.d, #32
+# CHECK-NEXT: 1 2 0.50 shrnt z0.b, z0.h, #1
+# CHECK-NEXT: 1 2 0.50 shrnt z0.h, z0.s, #1
+# CHECK-NEXT: 1 2 0.50 shrnt z0.s, z0.d, #1
+# CHECK-NEXT: 1 2 0.50 shrnt z31.b, z31.h, #8
+# CHECK-NEXT: 1 2 0.50 shrnt z31.h, z31.s, #16
+# CHECK-NEXT: 1 2 0.50 shrnt z31.s, z31.d, #32
+# CHECK-NEXT: 1 2 0.50 shsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 shsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 shsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 shsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 shsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 shsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 shsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 shsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 sli z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 sli z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.50 sli z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 sli z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 sli z31.b, z31.b, #7
+# CHECK-NEXT: 1 2 0.50 sli z31.d, z31.d, #63
+# CHECK-NEXT: 1 2 0.50 sli z31.h, z31.h, #15
+# CHECK-NEXT: 1 2 0.50 sli z31.s, z31.s, #31
+# CHECK-NEXT: 1 4 1.00 sm4e z0.s, z0.s, z31.s
+# CHECK-NEXT: 1 4 1.00 sm4ekey z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 smax z0.b, z0.b, #-128
+# CHECK-NEXT: 1 2 0.50 smax z0.d, z0.d, #-128
+# CHECK-NEXT: 1 2 0.50 smax z0.h, z0.h, #-128
+# CHECK-NEXT: 1 2 0.50 smax z0.s, z0.s, #-128
+# CHECK-NEXT: 1 2 0.50 smax z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 smax z31.b, z31.b, #127
+# CHECK-NEXT: 1 2 0.50 smax z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 smax z31.d, z31.d, #127
+# CHECK-NEXT: 1 2 0.50 smax z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 smax z31.h, z31.h, #127
+# CHECK-NEXT: 1 2 0.50 smax z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 smax z31.s, z31.s, #127
+# CHECK-NEXT: 1 2 0.50 smaxp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 smaxp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 smaxp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 smaxp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 6 9 4.00 smaxv b0, p7, z31.b
+# CHECK-NEXT: 2 4 1.00 smaxv d0, p7, z31.d
+# CHECK-NEXT: 4 8 2.00 smaxv h0, p7, z31.h
+# CHECK-NEXT: 4 6 2.00 smaxv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.50 smin z0.b, z0.b, #-128
+# CHECK-NEXT: 1 2 0.50 smin z0.d, z0.d, #-128
+# CHECK-NEXT: 1 2 0.50 smin z0.h, z0.h, #-128
+# CHECK-NEXT: 1 2 0.50 smin z0.s, z0.s, #-128
+# CHECK-NEXT: 1 2 0.50 smin z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 smin z31.b, z31.b, #127
+# CHECK-NEXT: 1 2 0.50 smin z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 smin z31.d, z31.d, #127
+# CHECK-NEXT: 1 2 0.50 smin z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 smin z31.h, z31.h, #127
+# CHECK-NEXT: 1 2 0.50 smin z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 smin z31.s, z31.s, #127
+# CHECK-NEXT: 1 2 0.50 sminp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 sminp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 sminp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 sminp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 6 9 4.00 sminv b0, p7, z31.b
+# CHECK-NEXT: 2 4 1.00 sminv d0, p7, z31.d
+# CHECK-NEXT: 4 8 2.00 sminv h0, p7, z31.h
+# CHECK-NEXT: 4 6 2.00 sminv s0, p7, z31.s
+# CHECK-NEXT: 1 4 1.00 smlalb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 smlalb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 smlalb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 smlalb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 smlalb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 smlalt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 smlalt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 smlalt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 smlalt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 smlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 smlslb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 smlslb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 smlslb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 smlslb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 smlslb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 smlslt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 smlslt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 smlslt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 smlslt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 smlslt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 3 0.50 smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: 1 4 1.00 smulh z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: 1 4 1.00 smulh z0.b, z1.b, z2.b
+# CHECK-NEXT: 2 5 2.00 smulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 4 1.00 smulh z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 4 1.00 smulh z0.h, z1.h, z2.h
+# CHECK-NEXT: 1 4 1.00 smulh z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 4 1.00 smulh z29.s, z30.s, z31.s
+# CHECK-NEXT: 2 5 2.00 smulh z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 4 1.00 smullb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 smullb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 4 1.00 smullb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 smullb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 1.00 smullb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 4 1.00 smullt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 smullt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 4 1.00 smullt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 smullt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 1.00 smullt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 3 1.00 splice z29.b, p7, { z30.b, z31.b }
+# CHECK-NEXT: 1 3 1.00 splice z29.d, p7, { z30.d, z31.d }
+# CHECK-NEXT: 1 3 1.00 splice z29.h, p7, { z30.h, z31.h }
+# CHECK-NEXT: 1 3 1.00 splice z29.s, p7, { z30.s, z31.s }
+# CHECK-NEXT: 1 3 1.00 splice z31.b, p7, z31.b, z31.b
+# CHECK-NEXT: 1 3 1.00 splice z31.d, p7, z31.d, z31.d
+# CHECK-NEXT: 1 3 1.00 splice z31.h, p7, z31.h, z31.h
+# CHECK-NEXT: 1 3 1.00 splice z31.s, p7, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 sqabs z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.50 sqabs z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 sqabs z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 sqabs z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 sqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 sqadd z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 sqadd z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 sqadd z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.50 sqadd z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 sqadd z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 sqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 sqadd z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 sqadd z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 sqadd z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 sqadd z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 sqadd z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 sqadd z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 sqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 sqadd z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.50 sqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 sqadd z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.50 sqadd z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.50 sqadd z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.50 sqcadd z0.b, z0.b, z0.b, #90
+# CHECK-NEXT: 1 2 0.50 sqcadd z0.d, z0.d, z0.d, #90
+# CHECK-NEXT: 1 2 0.50 sqcadd z0.h, z0.h, z0.h, #90
+# CHECK-NEXT: 1 2 0.50 sqcadd z0.s, z0.s, z0.s, #90
+# CHECK-NEXT: 1 2 0.50 sqcadd z31.b, z31.b, z31.b, #270
+# CHECK-NEXT: 1 2 0.50 sqcadd z31.d, z31.d, z31.d, #270
+# CHECK-NEXT: 1 2 0.50 sqcadd z31.h, z31.h, z31.h, #270
+# CHECK-NEXT: 1 2 0.50 sqcadd z31.s, z31.s, z31.s, #270
+# CHECK-NEXT: 1 2 0.50 sqdecb x0
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, #14
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, w0
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecb x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecd x0
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, #14
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, w0
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecd x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecd z0.d
+# CHECK-NEXT: 1 2 0.50 sqdecd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecd z0.d, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdech x0
+# CHECK-NEXT: 1 2 0.50 sqdech x0, #14
+# CHECK-NEXT: 1 2 0.50 sqdech x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdech x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdech x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqdech x0, w0
+# CHECK-NEXT: 1 2 0.50 sqdech x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdech x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdech x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdech z0.h
+# CHECK-NEXT: 1 2 0.50 sqdech z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdech z0.h, pow2
+# CHECK-NEXT: 1 2 0.50 sqdech z0.h, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecp x0, p0.b
+# CHECK-NEXT: 1 2 0.50 sqdecp x0, p0.d
+# CHECK-NEXT: 1 2 0.50 sqdecp x0, p0.h
+# CHECK-NEXT: 1 2 0.50 sqdecp x0, p0.s
+# CHECK-NEXT: 1 2 0.50 sqdecp xzr, p15.b, wzr
+# CHECK-NEXT: 1 2 0.50 sqdecp xzr, p15.d, wzr
+# CHECK-NEXT: 1 2 0.50 sqdecp xzr, p15.h, wzr
+# CHECK-NEXT: 1 2 0.50 sqdecp xzr, p15.s, wzr
+# CHECK-NEXT: 3 7 1.00 sqdecp z0.d, p0.d
+# CHECK-NEXT: 3 7 1.00 sqdecp z0.h, p0.h
+# CHECK-NEXT: 3 7 1.00 sqdecp z0.s, p0.s
+# CHECK-NEXT: 1 2 0.50 sqdecw x0
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, #14
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, w0
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecw x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecw z0.s
+# CHECK-NEXT: 1 2 0.50 sqdecw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqdecw z0.s, pow2
+# CHECK-NEXT: 1 2 0.50 sqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: 1 4 1.00 sqdmlalb z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: 1 4 1.00 sqdmlalb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 sqdmlalb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 sqdmlalb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 sqdmlalb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 sqdmlalbt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 sqdmlalbt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 sqdmlalbt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 sqdmlalt z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: 1 4 1.00 sqdmlalt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 sqdmlalt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 sqdmlalt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 sqdmlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 sqdmlslb z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: 1 4 1.00 sqdmlslb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 sqdmlslb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 sqdmlslb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 sqdmlslb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 sqdmlslbt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 sqdmlslbt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 sqdmlslbt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 sqdmlslt z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: 1 4 1.00 sqdmlslt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 sqdmlslt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 sqdmlslt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 sqdmlslt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 sqdmulh z0.b, z1.b, z2.b
+# CHECK-NEXT: 2 5 2.00 sqdmulh z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: 1 4 1.00 sqdmulh z0.h, z1.h, z2.h
+# CHECK-NEXT: 1 4 1.00 sqdmulh z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 sqdmulh z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 1.00 sqdmulh z29.s, z30.s, z31.s
+# CHECK-NEXT: 2 5 2.00 sqdmulh z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 4 1.00 sqdmullb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 sqdmullb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 4 1.00 sqdmullb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 sqdmullb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 1.00 sqdmullb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 4 1.00 sqdmullt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 sqdmullt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 4 1.00 sqdmullt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 sqdmullt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 1.00 sqdmullt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 sqincb x0
+# CHECK-NEXT: 1 2 0.50 sqincb x0, #14
+# CHECK-NEXT: 1 2 0.50 sqincb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincb x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincb x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqincb x0, w0
+# CHECK-NEXT: 1 2 0.50 sqincb x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincb x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincb x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincd x0
+# CHECK-NEXT: 1 2 0.50 sqincd x0, #14
+# CHECK-NEXT: 1 2 0.50 sqincd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincd x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincd x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqincd x0, w0
+# CHECK-NEXT: 1 2 0.50 sqincd x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincd x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincd x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincd z0.d
+# CHECK-NEXT: 1 2 0.50 sqincd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincd z0.d, pow2
+# CHECK-NEXT: 1 2 0.50 sqincd z0.d, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqinch x0
+# CHECK-NEXT: 1 2 0.50 sqinch x0, #14
+# CHECK-NEXT: 1 2 0.50 sqinch x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqinch x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqinch x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqinch x0, w0
+# CHECK-NEXT: 1 2 0.50 sqinch x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqinch x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqinch x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqinch z0.h
+# CHECK-NEXT: 1 2 0.50 sqinch z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqinch z0.h, pow2
+# CHECK-NEXT: 1 2 0.50 sqinch z0.h, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincp x0, p0.b
+# CHECK-NEXT: 1 2 0.50 sqincp x0, p0.d
+# CHECK-NEXT: 1 2 0.50 sqincp x0, p0.h
+# CHECK-NEXT: 1 2 0.50 sqincp x0, p0.s
+# CHECK-NEXT: 1 2 0.50 sqincp xzr, p15.b, wzr
+# CHECK-NEXT: 1 2 0.50 sqincp xzr, p15.d, wzr
+# CHECK-NEXT: 1 2 0.50 sqincp xzr, p15.h, wzr
+# CHECK-NEXT: 1 2 0.50 sqincp xzr, p15.s, wzr
+# CHECK-NEXT: 3 7 1.00 sqincp z0.d, p0.d
+# CHECK-NEXT: 3 7 1.00 sqincp z0.h, p0.h
+# CHECK-NEXT: 3 7 1.00 sqincp z0.s, p0.s
+# CHECK-NEXT: 1 2 0.50 sqincw x0
+# CHECK-NEXT: 1 2 0.50 sqincw x0, #14
+# CHECK-NEXT: 1 2 0.50 sqincw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincw x0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincw x0, vl1
+# CHECK-NEXT: 1 2 0.50 sqincw x0, w0
+# CHECK-NEXT: 1 2 0.50 sqincw x0, w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincw x0, w0, pow2
+# CHECK-NEXT: 1 2 0.50 sqincw x0, w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincw z0.s
+# CHECK-NEXT: 1 2 0.50 sqincw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.50 sqincw z0.s, pow2
+# CHECK-NEXT: 1 2 0.50 sqincw z0.s, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 sqneg z31.b, p7/m, z31.b
+# CHECK-NEXT: 1 2 0.50 sqneg z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 sqneg z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 sqneg z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z0.b, z1.b, z2.b, #0
+# CHECK-NEXT: 1 5 2.00 sqrdcmlah z0.d, z1.d, z2.d, #0
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z0.h, z1.h, z2.h, #0
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z0.h, z1.h, z2.h[0], #0
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z0.s, z1.s, z2.s, #0
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z0.s, z1.s, z2.s[0], #0
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z15.b, z16.b, z17.b, #270
+# CHECK-NEXT: 1 5 2.00 sqrdcmlah z15.d, z16.d, z17.d, #270
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z15.h, z16.h, z17.h, #270
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z15.s, z16.s, z17.s, #270
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z29.b, z30.b, z31.b, #90
+# CHECK-NEXT: 1 5 2.00 sqrdcmlah z29.d, z30.d, z31.d, #90
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z29.h, z30.h, z31.h, #90
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z29.s, z30.s, z31.s, #90
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z31.b, z31.b, z31.b, #180
+# CHECK-NEXT: 1 5 2.00 sqrdcmlah z31.d, z31.d, z31.d, #180
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z31.h, z30.h, z7.h[0], #180
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z31.h, z31.h, z31.h, #180
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z31.s, z30.s, z7.s[0], #180
+# CHECK-NEXT: 1 4 1.00 sqrdcmlah z31.s, z31.s, z31.s, #180
+# CHECK-NEXT: 1 4 1.00 sqrdmlah z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 5 2.00 sqrdmlah z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: 1 5 2.00 sqrdmlah z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 4 1.00 sqrdmlah z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 sqrdmlah z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 sqrdmlah z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 sqrdmlah z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 5 2.00 sqrdmlsh z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: 1 5 2.00 sqrdmlsh z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 sqrdmlsh z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 1.00 sqrdmulh z0.b, z1.b, z2.b
+# CHECK-NEXT: 2 5 2.00 sqrdmulh z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: 1 4 1.00 sqrdmulh z0.h, z1.h, z2.h
+# CHECK-NEXT: 1 4 1.00 sqrdmulh z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 sqrdmulh z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: 1 4 1.00 sqrdmulh z29.s, z30.s, z31.s
+# CHECK-NEXT: 2 5 2.00 sqrdmulh z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 4 0.50 sqrshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.50 sqrshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.50 sqrshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.50 sqrshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 sqrshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.50 sqrshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.50 sqrshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.50 sqrshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 sqrshrnb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 sqrshrnb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 sqrshrnb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 sqrshrnb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 sqrshrnb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 sqrshrnb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.50 sqrshrnt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 sqrshrnt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 sqrshrnt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 sqrshrnt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 sqrshrnt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 sqrshrnt z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.50 sqrshrunb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 sqrshrunb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 sqrshrunb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 sqrshrunb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 sqrshrunb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 sqrshrunb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.50 sqrshrunt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 sqrshrunt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 sqrshrunt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 sqrshrunt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 sqrshrunt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 sqrshrunt z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.50 sqshl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: 1 4 0.50 sqshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.50 sqshl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: 1 4 0.50 sqshl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: 1 4 0.50 sqshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.50 sqshl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: 1 4 0.50 sqshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.50 sqshl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: 1 4 0.50 sqshl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: 1 4 0.50 sqshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 sqshl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: 1 4 0.50 sqshl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: 1 4 0.50 sqshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.50 sqshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.50 sqshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.50 sqshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 sqshlu z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: 1 4 0.50 sqshlu z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: 1 4 0.50 sqshlu z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: 1 4 0.50 sqshlu z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: 1 4 0.50 sqshlu z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: 1 4 0.50 sqshlu z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: 1 4 0.50 sqshlu z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: 1 4 0.50 sqshlu z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: 1 4 0.50 sqshrnb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 sqshrnb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 sqshrnb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 sqshrnb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 sqshrnb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 sqshrnb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.50 sqshrnt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 sqshrnt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 sqshrnt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 sqshrnt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 sqshrnt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 sqshrnt z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.50 sqshrunb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 sqshrunb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 sqshrunb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 sqshrunb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 sqshrunb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 sqshrunb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.50 sqshrunt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 sqshrunt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 sqshrunt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 sqshrunt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 sqshrunt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 sqshrunt z31.s, z31.d, #32
+# CHECK-NEXT: 1 2 0.50 sqsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 sqsub z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 sqsub z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 sqsub z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.50 sqsub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 sqsub z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 sqsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 sqsub z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 sqsub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 sqsub z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 sqsub z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 sqsub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 sqsub z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 sqsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 sqsub z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.50 sqsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 sqsub z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.50 sqsub z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.50 sqsub z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.50 sqsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 sqsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 sqsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 sqsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 sqxtnb z0.b, z31.h
+# CHECK-NEXT: 1 4 0.50 sqxtnb z0.h, z31.s
+# CHECK-NEXT: 1 4 0.50 sqxtnb z0.s, z31.d
+# CHECK-NEXT: 1 4 0.50 sqxtnt z0.b, z31.h
+# CHECK-NEXT: 1 4 0.50 sqxtnt z0.h, z31.s
+# CHECK-NEXT: 1 4 0.50 sqxtnt z0.s, z31.d
+# CHECK-NEXT: 1 4 0.50 sqxtunb z0.b, z31.h
+# CHECK-NEXT: 1 4 0.50 sqxtunb z0.h, z31.s
+# CHECK-NEXT: 1 4 0.50 sqxtunb z0.s, z31.d
+# CHECK-NEXT: 1 4 0.50 sqxtunt z0.b, z31.h
+# CHECK-NEXT: 1 4 0.50 sqxtunt z0.h, z31.s
+# CHECK-NEXT: 1 4 0.50 sqxtunt z0.s, z31.d
+# CHECK-NEXT: 1 2 0.50 srhadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 srhadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 srhadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 srhadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 sri z0.b, z0.b, #1
+# CHECK-NEXT: 1 2 0.50 sri z0.d, z0.d, #1
+# CHECK-NEXT: 1 2 0.50 sri z0.h, z0.h, #1
+# CHECK-NEXT: 1 2 0.50 sri z0.s, z0.s, #1
+# CHECK-NEXT: 1 2 0.50 sri z31.b, z31.b, #8
+# CHECK-NEXT: 1 2 0.50 sri z31.d, z31.d, #64
+# CHECK-NEXT: 1 2 0.50 sri z31.h, z31.h, #16
+# CHECK-NEXT: 1 2 0.50 sri z31.s, z31.s, #32
+# CHECK-NEXT: 1 4 0.50 srshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.50 srshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.50 srshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.50 srshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 srshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.50 srshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.50 srshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.50 srshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 srshr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 1 4 0.50 srshr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 srshr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 srshr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 srshr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 1 4 0.50 srshr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 1 4 0.50 srshr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 1 4 0.50 srshr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 1 4 1.00 srsra z0.b, z0.b, #1
+# CHECK-NEXT: 1 4 1.00 srsra z0.d, z0.d, #1
+# CHECK-NEXT: 1 4 1.00 srsra z0.h, z0.h, #1
+# CHECK-NEXT: 1 4 1.00 srsra z0.s, z0.s, #1
+# CHECK-NEXT: 1 4 1.00 srsra z31.b, z31.b, #8
+# CHECK-NEXT: 1 4 1.00 srsra z31.d, z31.d, #64
+# CHECK-NEXT: 1 4 1.00 srsra z31.h, z31.h, #16
+# CHECK-NEXT: 1 4 1.00 srsra z31.s, z31.s, #32
+# CHECK-NEXT: 1 2 0.50 sshllb z0.d, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 sshllb z0.h, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 sshllb z0.s, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 sshllb z31.d, z31.s, #31
+# CHECK-NEXT: 1 2 0.50 sshllb z31.h, z31.b, #7
+# CHECK-NEXT: 1 2 0.50 sshllb z31.s, z31.h, #15
+# CHECK-NEXT: 1 2 0.50 sshllt z0.d, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 sshllt z0.h, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 sshllt z0.s, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 sshllt z31.d, z31.s, #31
+# CHECK-NEXT: 1 2 0.50 sshllt z31.h, z31.b, #7
+# CHECK-NEXT: 1 2 0.50 sshllt z31.s, z31.h, #15
+# CHECK-NEXT: 1 4 1.00 ssra z0.b, z0.b, #1
+# CHECK-NEXT: 1 4 1.00 ssra z0.d, z0.d, #1
+# CHECK-NEXT: 1 4 1.00 ssra z0.h, z0.h, #1
+# CHECK-NEXT: 1 4 1.00 ssra z0.s, z0.s, #1
+# CHECK-NEXT: 1 4 1.00 ssra z31.b, z31.b, #8
+# CHECK-NEXT: 1 4 1.00 ssra z31.d, z31.d, #64
+# CHECK-NEXT: 1 4 1.00 ssra z31.h, z31.h, #16
+# CHECK-NEXT: 1 4 1.00 ssra z31.s, z31.s, #32
+# CHECK-NEXT: 1 2 0.50 ssublb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 ssublb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 ssublb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 ssublbt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 ssublbt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 2 0.50 ssublbt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 ssublt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 ssublt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 ssublt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 ssubltb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 ssubltb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 2 0.50 ssubltb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 ssubwb z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.50 ssubwb z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.50 ssubwb z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 2 0.50 ssubwt z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.50 ssubwt z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.50 ssubwt z31.d, z31.d, z31.s
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.b }, p0, [x0, x0]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.b }, p0, [x0]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.d }, p0, [x0, x0]
+# CHECK-NEXT: 4 2 1.00 * st1b { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: 4 2 1.00 * st1b { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 4 2 1.00 * st1b { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.d }, p0, [x0]
+# CHECK-NEXT: 4 2 1.00 * st1b { z0.d }, p7, [z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.h }, p0, [x0, x0]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.h }, p0, [x0]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.s }, p0, [x0, x0]
+# CHECK-NEXT: 8 4 2.00 * st1b { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: 8 4 2.00 * st1b { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 2 0.50 * st1b { z0.s }, p0, [x0]
+# CHECK-NEXT: 8 4 2.00 * st1b { z0.s }, p7, [z0.s]
+# CHECK-NEXT: 2 2 0.50 * st1b { z21.b }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1b { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1b { z21.h }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1b { z21.s }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1b { z31.b }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1b { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 2 1.00 * st1b { z31.d }, p7, [z31.d, #31]
+# CHECK-NEXT: 2 2 0.50 * st1b { z31.h }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1b { z31.s }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 8 4 2.00 * st1b { z31.s }, p7, [z31.s, #31]
+# CHECK-NEXT: 2 2 0.50 * st1d { z0.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 4 2 1.00 * st1d { z0.d }, p0, [x0, z0.d, lsl #3]
+# CHECK-NEXT: 4 2 1.00 * st1d { z0.d }, p0, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: 4 2 1.00 * st1d { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: 4 2 1.00 * st1d { z0.d }, p0, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: 4 2 1.00 * st1d { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 4 2 1.00 * st1d { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1d { z0.d }, p0, [x0]
+# CHECK-NEXT: 4 2 1.00 * st1d { z0.d }, p7, [z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1d { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1d { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 2 1.00 * st1d { z31.d }, p7, [z31.d, #248]
+# CHECK-NEXT: 3 2 0.50 * st1h { z0.d }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 4 2 1.00 * st1h { z0.d }, p0, [x0, z0.d, lsl #1]
+# CHECK-NEXT: 4 2 1.00 * st1h { z0.d }, p0, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: 4 2 1.00 * st1h { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: 4 2 1.00 * st1h { z0.d }, p0, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: 4 2 1.00 * st1h { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 4 2 1.00 * st1h { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1h { z0.d }, p0, [x0]
+# CHECK-NEXT: 4 2 1.00 * st1h { z0.d }, p7, [z0.d]
+# CHECK-NEXT: 3 2 0.50 * st1h { z0.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 2 0.50 * st1h { z0.h }, p0, [x0]
+# CHECK-NEXT: 3 2 0.50 * st1h { z0.s }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 8 4 2.00 * st1h { z0.s }, p0, [x0, z0.s, sxtw #1]
+# CHECK-NEXT: 8 4 2.00 * st1h { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: 8 4 2.00 * st1h { z0.s }, p0, [x0, z0.s, uxtw #1]
+# CHECK-NEXT: 8 4 2.00 * st1h { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 2 0.50 * st1h { z0.s }, p0, [x0]
+# CHECK-NEXT: 8 4 2.00 * st1h { z0.s }, p7, [z0.s]
+# CHECK-NEXT: 2 2 0.50 * st1h { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1h { z21.h }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1h { z21.s }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1h { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 2 1.00 * st1h { z31.d }, p7, [z31.d, #62]
+# CHECK-NEXT: 2 2 0.50 * st1h { z31.h }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1h { z31.s }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 8 4 2.00 * st1h { z31.s }, p7, [z31.s, #62]
+# CHECK-NEXT: 2 2 0.50 * st1w { z0.d }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 4 2 1.00 * st1w { z0.d }, p0, [x0, z0.d, lsl #2]
+# CHECK-NEXT: 4 2 1.00 * st1w { z0.d }, p0, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: 4 2 1.00 * st1w { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: 4 2 1.00 * st1w { z0.d }, p0, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: 4 2 1.00 * st1w { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: 4 2 1.00 * st1w { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1w { z0.d }, p0, [x0]
+# CHECK-NEXT: 4 2 1.00 * st1w { z0.d }, p7, [z0.d]
+# CHECK-NEXT: 2 2 0.50 * st1w { z0.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 8 4 2.00 * st1w { z0.s }, p0, [x0, z0.s, sxtw #2]
+# CHECK-NEXT: 8 4 2.00 * st1w { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: 8 4 2.00 * st1w { z0.s }, p0, [x0, z0.s, uxtw #2]
+# CHECK-NEXT: 8 4 2.00 * st1w { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: 2 2 0.50 * st1w { z0.s }, p0, [x0]
+# CHECK-NEXT: 8 4 2.00 * st1w { z0.s }, p7, [z0.s]
+# CHECK-NEXT: 2 2 0.50 * st1w { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1w { z21.s }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: 2 2 0.50 * st1w { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 4 2 1.00 * st1w { z31.d }, p7, [z31.d, #124]
+# CHECK-NEXT: 2 2 0.50 * st1w { z31.s }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: 8 4 2.00 * st1w { z31.s }, p7, [z31.s, #124]
+# CHECK-NEXT: 4 4 1.00 * st2b { z0.b, z1.b }, p0, [x0, x0]
+# CHECK-NEXT: 2 4 0.50 * st2b { z0.b, z1.b }, p0, [x0]
+# CHECK-NEXT: 2 4 0.50 * st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 4 4 1.00 * st2b { z5.b, z6.b }, p3, [x17, x16]
+# CHECK-NEXT: 4 4 1.00 * st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 4 0.50 * st2d { z0.d, z1.d }, p0, [x0]
+# CHECK-NEXT: 2 4 0.50 * st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 4 4 1.00 * st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 6 4 1.00 * st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 4 0.50 * st2h { z0.h, z1.h }, p0, [x0]
+# CHECK-NEXT: 2 4 0.50 * st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 6 4 1.00 * st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 4 4 1.00 * st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 4 0.50 * st2w { z0.s, z1.s }, p0, [x0]
+# CHECK-NEXT: 2 4 0.50 * st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: 2 4 0.50 * st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: 4 4 1.00 * st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 27 7 4.50 * st3b { z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT: 18 7 4.50 * st3b { z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT: 18 7 4.50 * st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 18 7 4.50 * st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 27 7 4.50 * st3b { z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT: 27 7 4.50 * st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 18 7 4.50 * st3d { z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT: 18 7 4.50 * st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 18 7 4.50 * st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 27 7 4.50 * st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 27 7 4.50 * st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 18 7 4.50 * st3h { z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT: 18 7 4.50 * st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 18 7 4.50 * st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 27 7 4.50 * st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 27 7 4.50 * st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 18 7 4.50 * st3w { z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT: 18 7 4.50 * st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: 18 7 4.50 * st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: 27 7 4.50 * st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 54 11 9.00 * st4b { z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT: 36 11 9.00 * st4b { z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT: 36 11 9.00 * st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 36 11 9.00 * st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 54 11 9.00 * st4b { z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT: 54 11 9.00 * st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 36 11 9.00 * st4d { z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT: 36 11 9.00 * st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 36 11 9.00 * st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 54 11 9.00 * st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: 54 11 9.00 * st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 36 11 9.00 * st4h { z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT: 36 11 9.00 * st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 36 11 9.00 * st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 54 11 9.00 * st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: 54 11 9.00 * st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 36 11 9.00 * st4w { z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT: 36 11 9.00 * st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: 36 11 9.00 * st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: 54 11 9.00 * st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: 2 2 0.50 * stnt1b { z0.b }, p0, [x0, x0]
+# CHECK-NEXT: 2 2 0.50 * stnt1b { z0.b }, p0, [x0]
+# CHECK-NEXT: 4 2 1.00 * stnt1b { z0.d }, p0, [z1.d]
+# CHECK-NEXT: 8 4 2.00 * stnt1b { z0.s }, p0, [z1.s]
+# CHECK-NEXT: 2 2 0.50 * stnt1b { z21.b }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: 2 2 0.50 * stnt1b { z23.b }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: 4 2 1.00 * stnt1b { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: 4 2 1.00 * stnt1b { z31.d }, p7, [z31.d]
+# CHECK-NEXT: 8 4 2.00 * stnt1b { z31.s }, p7, [z31.s, x0]
+# CHECK-NEXT: 8 4 2.00 * stnt1b { z31.s }, p7, [z31.s]
+# CHECK-NEXT: 2 2 0.50 * stnt1d { z0.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: 2 2 0.50 * stnt1d { z0.d }, p0, [x0]
+# CHECK-NEXT: 4 2 1.00 * stnt1d { z0.d }, p0, [z1.d]
+# CHECK-NEXT: 2 2 0.50 * stnt1d { z21.d }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: 2 2 0.50 * stnt1d { z23.d }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: 4 2 1.00 * stnt1d { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: 4 2 1.00 * stnt1d { z31.d }, p7, [z31.d]
+# CHECK-NEXT: 4 2 1.00 * stnt1h { z0.d }, p0, [z1.d]
+# CHECK-NEXT: 3 2 0.50 * stnt1h { z0.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: 2 2 0.50 * stnt1h { z0.h }, p0, [x0]
+# CHECK-NEXT: 8 4 2.00 * stnt1h { z0.s }, p0, [z1.s]
+# CHECK-NEXT: 2 2 0.50 * stnt1h { z21.h }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: 2 2 0.50 * stnt1h { z23.h }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: 4 2 1.00 * stnt1h { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: 4 2 1.00 * stnt1h { z31.d }, p7, [z31.d]
+# CHECK-NEXT: 8 4 2.00 * stnt1h { z31.s }, p7, [z31.s, x0]
+# CHECK-NEXT: 8 4 2.00 * stnt1h { z31.s }, p7, [z31.s]
+# CHECK-NEXT: 4 2 1.00 * stnt1w { z0.d }, p0, [z1.d]
+# CHECK-NEXT: 2 2 0.50 * stnt1w { z0.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: 2 2 0.50 * stnt1w { z0.s }, p0, [x0]
+# CHECK-NEXT: 8 4 2.00 * stnt1w { z0.s }, p0, [z1.s]
+# CHECK-NEXT: 2 2 0.50 * stnt1w { z21.s }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: 2 2 0.50 * stnt1w { z23.s }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: 4 2 1.00 * stnt1w { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: 4 2 1.00 * stnt1w { z31.d }, p7, [z31.d]
+# CHECK-NEXT: 8 4 2.00 * stnt1w { z31.s }, p7, [z31.s, x0]
+# CHECK-NEXT: 8 4 2.00 * stnt1w { z31.s }, p7, [z31.s]
+# CHECK-NEXT: 1 1 0.50 * str p0, [x0]
+# CHECK-NEXT: 1 1 0.50 * str p15, [sp, #-256, mul vl]
+# CHECK-NEXT: 1 1 0.50 * str p5, [x10, #255, mul vl]
+# CHECK-NEXT: 2 2 0.50 * str z0, [x0]
+# CHECK-NEXT: 2 2 0.50 * str z21, [x10, #-256, mul vl]
+# CHECK-NEXT: 2 2 0.50 * str z31, [sp, #255, mul vl]
+# CHECK-NEXT: 1 2 0.50 sub z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 sub z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 sub z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 sub z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 sub z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.50 sub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 sub z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 sub z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 sub z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 sub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 sub z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 sub z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 sub z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 sub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 sub z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 sub z21.b, p5/m, z21.b, z10.b
+# CHECK-NEXT: 1 2 0.50 sub z21.b, z10.b, z21.b
+# CHECK-NEXT: 1 2 0.50 sub z21.d, p5/m, z21.d, z10.d
+# CHECK-NEXT: 1 2 0.50 sub z21.d, z10.d, z21.d
+# CHECK-NEXT: 1 2 0.50 sub z21.h, p5/m, z21.h, z10.h
+# CHECK-NEXT: 1 2 0.50 sub z21.h, z10.h, z21.h
+# CHECK-NEXT: 1 2 0.50 sub z21.s, p5/m, z21.s, z10.s
+# CHECK-NEXT: 1 2 0.50 sub z21.s, z10.s, z21.s
+# CHECK-NEXT: 1 2 0.50 sub z23.b, p3/m, z23.b, z13.b
+# CHECK-NEXT: 1 2 0.50 sub z23.b, z13.b, z8.b
+# CHECK-NEXT: 1 2 0.50 sub z23.d, p3/m, z23.d, z13.d
+# CHECK-NEXT: 1 2 0.50 sub z23.d, z13.d, z8.d
+# CHECK-NEXT: 1 2 0.50 sub z23.h, p3/m, z23.h, z13.h
+# CHECK-NEXT: 1 2 0.50 sub z23.h, z13.h, z8.h
+# CHECK-NEXT: 1 2 0.50 sub z23.s, p3/m, z23.s, z13.s
+# CHECK-NEXT: 1 2 0.50 sub z23.s, z13.s, z8.s
+# CHECK-NEXT: 1 2 0.50 sub z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 sub z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.50 sub z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 sub z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 sub z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.50 sub z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 sub z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 sub z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.50 sub z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 sub z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 sub z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.50 sub z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 subhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 subhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 subhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 subhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 subhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: 1 2 0.50 subhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: 1 2 0.50 subr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 subr z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 subr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 subr z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.50 subr z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 subr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 subr z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 subr z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 subr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 subr z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 subr z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 subr z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.50 subr z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.50 subr z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.50 subr z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.50 sunpkhi z31.d, z31.s
+# CHECK-NEXT: 1 2 0.50 sunpkhi z31.h, z31.b
+# CHECK-NEXT: 1 2 0.50 sunpkhi z31.s, z31.h
+# CHECK-NEXT: 1 2 0.50 sunpklo z31.d, z31.s
+# CHECK-NEXT: 1 2 0.50 sunpklo z31.h, z31.b
+# CHECK-NEXT: 1 2 0.50 sunpklo z31.s, z31.h
+# CHECK-NEXT: 1 2 0.50 suqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 suqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 suqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 suqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 sxtb z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.50 sxtb z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 2 0.50 sxtb z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 2 0.50 sxtb z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 sxtb z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 sxtb z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 sxth z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.50 sxth z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 2 0.50 sxth z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 sxth z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 sxtw z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.50 sxtw z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 tbl z28.b, { z29.b, z30.b }, z31.b
+# CHECK-NEXT: 1 2 0.50 tbl z28.d, { z29.d, z30.d }, z31.d
+# CHECK-NEXT: 1 2 0.50 tbl z28.h, { z29.h, z30.h }, z31.h
+# CHECK-NEXT: 1 2 0.50 tbl z28.s, { z29.s, z30.s }, z31.s
+# CHECK-NEXT: 1 2 0.50 tbl z31.b, { z31.b }, z31.b
+# CHECK-NEXT: 1 2 0.50 tbl z31.d, { z31.d }, z31.d
+# CHECK-NEXT: 1 2 0.50 tbl z31.h, { z31.h }, z31.h
+# CHECK-NEXT: 1 2 0.50 tbl z31.s, { z31.s }, z31.s
+# CHECK-NEXT: 1 2 0.50 tbx z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 tbx z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 tbx z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 tbx z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 trn1 p15.b, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 trn1 p15.d, p15.d, p15.d
+# CHECK-NEXT: 1 2 0.50 trn1 p15.h, p15.h, p15.h
+# CHECK-NEXT: 1 2 0.50 trn1 p15.s, p15.s, p15.s
+# CHECK-NEXT: 1 2 0.50 trn1 z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 trn1 z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 trn1 z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 trn1 z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 trn2 p15.b, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 trn2 p15.d, p15.d, p15.d
+# CHECK-NEXT: 1 2 0.50 trn2 p15.h, p15.h, p15.h
+# CHECK-NEXT: 1 2 0.50 trn2 p15.s, p15.s, p15.s
+# CHECK-NEXT: 1 2 0.50 trn2 z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 trn2 z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 trn2 z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 trn2 z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 4 0.50 uaba z0.b, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 uaba z0.d, z1.d, z31.d
+# CHECK-NEXT: 1 4 0.50 uaba z0.h, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 uaba z0.s, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 uabalb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 uabalb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 uabalb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 0.50 uabalt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 0.50 uabalt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 0.50 uabalt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 2 0.50 uabd z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 uabd z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 uabd z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 uabd z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 uabdlb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 uabdlb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 uabdlb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 uabdlt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 uabdlt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 uabdlt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 4 0.50 uadalp z0.h, p0/m, z1.b
+# CHECK-NEXT: 1 4 0.50 uadalp z29.s, p0/m, z30.h
+# CHECK-NEXT: 1 4 0.50 uadalp z30.d, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 uaddlb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 uaddlb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 uaddlb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 uaddlt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 uaddlt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 uaddlt z31.d, z31.s, z31.s
+# CHECK-NEXT: 6 9 4.00 uaddv d0, p7, z31.b
+# CHECK-NEXT: 2 4 1.00 uaddv d0, p7, z31.d
+# CHECK-NEXT: 4 8 2.00 uaddv d0, p7, z31.h
+# CHECK-NEXT: 4 6 2.00 uaddv d0, p7, z31.s
+# CHECK-NEXT: 1 2 0.50 uaddwb z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.50 uaddwb z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.50 uaddwb z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 2 0.50 uaddwt z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.50 uaddwt z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.50 uaddwt z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 3 1.00 ucvtf z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 3 1.00 ucvtf z0.d, p0/m, z0.s
+# CHECK-NEXT: 1 3 1.00 ucvtf z0.h, p0/m, z0.d
+# CHECK-NEXT: 4 6 4.00 ucvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: 2 4 2.00 ucvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: 1 3 1.00 ucvtf z0.s, p0/m, z0.d
+# CHECK-NEXT: 2 4 2.00 ucvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 20 20.00 udiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 12 11.00 udiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 20 20.00 udivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 12 11.00 udivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 3 1.00 udot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: 1 3 1.00 udot z0.d, z1.h, z31.h
+# CHECK-NEXT: 1 3 0.50 udot z0.s, z1.b, z31.b
+# CHECK-NEXT: 1 3 0.50 udot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: 1 2 0.50 uhadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 uhadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 uhadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 uhadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 uhsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 uhsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 uhsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 uhsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 uhsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 uhsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 uhsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 uhsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 umax z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 umax z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 umax z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.50 umax z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 umax z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 umax z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 umaxp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 umaxp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 umaxp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 umaxp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 6 9 4.00 umaxv b0, p7, z31.b
+# CHECK-NEXT: 2 4 1.00 umaxv d0, p7, z31.d
+# CHECK-NEXT: 4 8 2.00 umaxv h0, p7, z31.h
+# CHECK-NEXT: 4 6 2.00 umaxv s0, p7, z31.s
+# CHECK-NEXT: 1 2 0.50 umin z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 umin z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 umin z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.50 umin z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 umin z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 umin z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 uminp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 uminp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 uminp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 uminp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 6 9 4.00 uminv b0, p7, z31.b
+# CHECK-NEXT: 2 4 1.00 uminv d0, p7, z31.d
+# CHECK-NEXT: 4 8 2.00 uminv h0, p7, z31.h
+# CHECK-NEXT: 4 6 2.00 uminv s0, p7, z31.s
+# CHECK-NEXT: 1 4 1.00 umlalb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 umlalb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 umlalb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 umlalb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 umlalb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 umlalt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 umlalt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 umlalt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 umlalt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 umlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 umlslb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 umlslb z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 umlslb z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 umlslb z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 umlslb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 umlslt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 umlslt z0.d, z1.s, z31.s
+# CHECK-NEXT: 1 4 1.00 umlslt z0.h, z1.b, z31.b
+# CHECK-NEXT: 1 4 1.00 umlslt z0.s, z1.h, z31.h
+# CHECK-NEXT: 1 4 1.00 umlslt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 3 0.50 ummla z0.s, z1.b, z2.b
+# CHECK-NEXT: 1 4 1.00 umulh z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: 1 4 1.00 umulh z0.b, z1.b, z2.b
+# CHECK-NEXT: 2 5 2.00 umulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: 1 4 1.00 umulh z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: 1 4 1.00 umulh z0.h, z1.h, z2.h
+# CHECK-NEXT: 1 4 1.00 umulh z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: 1 4 1.00 umulh z29.s, z30.s, z31.s
+# CHECK-NEXT: 2 5 2.00 umulh z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 4 1.00 umullb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 umullb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 4 1.00 umullb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 umullb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 1.00 umullb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 4 1.00 umullt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: 1 4 1.00 umullt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 4 1.00 umullt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: 1 4 1.00 umullt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 4 1.00 umullt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 uqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 uqadd z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 uqadd z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 uqadd z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.50 uqadd z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 uqadd z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 uqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 uqadd z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 uqadd z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 uqadd z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 uqadd z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 uqadd z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 uqadd z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 uqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 uqadd z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.50 uqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 uqadd z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.50 uqadd z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.50 uqadd z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.50 uqdecb w0
+# CHECK-NEXT: 1 2 0.50 uqdecb w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecb w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecb w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecb x0
+# CHECK-NEXT: 1 2 0.50 uqdecb x0, #14
+# CHECK-NEXT: 1 2 0.50 uqdecb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecb x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecb x0, vl1
+# CHECK-NEXT: 1 2 0.50 uqdecd w0
+# CHECK-NEXT: 1 2 0.50 uqdecd w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecd w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecd w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecd x0
+# CHECK-NEXT: 1 2 0.50 uqdecd x0, #14
+# CHECK-NEXT: 1 2 0.50 uqdecd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecd x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecd x0, vl1
+# CHECK-NEXT: 1 2 0.50 uqdecd z0.d
+# CHECK-NEXT: 1 2 0.50 uqdecd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecd z0.d, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdech w0
+# CHECK-NEXT: 1 2 0.50 uqdech w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdech w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdech w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdech x0
+# CHECK-NEXT: 1 2 0.50 uqdech x0, #14
+# CHECK-NEXT: 1 2 0.50 uqdech x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdech x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdech x0, vl1
+# CHECK-NEXT: 1 2 0.50 uqdech z0.h
+# CHECK-NEXT: 1 2 0.50 uqdech z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdech z0.h, pow2
+# CHECK-NEXT: 1 2 0.50 uqdech z0.h, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecp wzr, p15.b
+# CHECK-NEXT: 1 2 0.50 uqdecp wzr, p15.d
+# CHECK-NEXT: 1 2 0.50 uqdecp wzr, p15.h
+# CHECK-NEXT: 1 2 0.50 uqdecp wzr, p15.s
+# CHECK-NEXT: 1 2 0.50 uqdecp x0, p0.b
+# CHECK-NEXT: 1 2 0.50 uqdecp x0, p0.d
+# CHECK-NEXT: 1 2 0.50 uqdecp x0, p0.h
+# CHECK-NEXT: 1 2 0.50 uqdecp x0, p0.s
+# CHECK-NEXT: 3 7 1.00 uqdecp z0.d, p0.d
+# CHECK-NEXT: 3 7 1.00 uqdecp z0.h, p0.h
+# CHECK-NEXT: 3 7 1.00 uqdecp z0.s, p0.s
+# CHECK-NEXT: 1 2 0.50 uqdecw w0
+# CHECK-NEXT: 1 2 0.50 uqdecw w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecw w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecw w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecw x0
+# CHECK-NEXT: 1 2 0.50 uqdecw x0, #14
+# CHECK-NEXT: 1 2 0.50 uqdecw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecw x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecw x0, vl1
+# CHECK-NEXT: 1 2 0.50 uqdecw z0.s
+# CHECK-NEXT: 1 2 0.50 uqdecw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqdecw z0.s, pow2
+# CHECK-NEXT: 1 2 0.50 uqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincb w0
+# CHECK-NEXT: 1 2 0.50 uqincb w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincb w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqincb w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincb x0
+# CHECK-NEXT: 1 2 0.50 uqincb x0, #14
+# CHECK-NEXT: 1 2 0.50 uqincb x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincb x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqincb x0, vl1
+# CHECK-NEXT: 1 2 0.50 uqincd w0
+# CHECK-NEXT: 1 2 0.50 uqincd w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincd w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqincd w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincd x0
+# CHECK-NEXT: 1 2 0.50 uqincd x0, #14
+# CHECK-NEXT: 1 2 0.50 uqincd x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincd x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqincd x0, vl1
+# CHECK-NEXT: 1 2 0.50 uqincd z0.d
+# CHECK-NEXT: 1 2 0.50 uqincd z0.d, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincd z0.d, pow2
+# CHECK-NEXT: 1 2 0.50 uqincd z0.d, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqinch w0
+# CHECK-NEXT: 1 2 0.50 uqinch w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqinch w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqinch w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqinch x0
+# CHECK-NEXT: 1 2 0.50 uqinch x0, #14
+# CHECK-NEXT: 1 2 0.50 uqinch x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqinch x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqinch x0, vl1
+# CHECK-NEXT: 1 2 0.50 uqinch z0.h
+# CHECK-NEXT: 1 2 0.50 uqinch z0.h, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqinch z0.h, pow2
+# CHECK-NEXT: 1 2 0.50 uqinch z0.h, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincp wzr, p15.b
+# CHECK-NEXT: 1 2 0.50 uqincp wzr, p15.d
+# CHECK-NEXT: 1 2 0.50 uqincp wzr, p15.h
+# CHECK-NEXT: 1 2 0.50 uqincp wzr, p15.s
+# CHECK-NEXT: 1 2 0.50 uqincp x0, p0.b
+# CHECK-NEXT: 1 2 0.50 uqincp x0, p0.d
+# CHECK-NEXT: 1 2 0.50 uqincp x0, p0.h
+# CHECK-NEXT: 1 2 0.50 uqincp x0, p0.s
+# CHECK-NEXT: 3 7 1.00 uqincp z0.d, p0.d
+# CHECK-NEXT: 3 7 1.00 uqincp z0.h, p0.h
+# CHECK-NEXT: 3 7 1.00 uqincp z0.s, p0.s
+# CHECK-NEXT: 1 2 0.50 uqincw w0
+# CHECK-NEXT: 1 2 0.50 uqincw w0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincw w0, pow2
+# CHECK-NEXT: 1 2 0.50 uqincw w0, pow2, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincw x0
+# CHECK-NEXT: 1 2 0.50 uqincw x0, #14
+# CHECK-NEXT: 1 2 0.50 uqincw x0, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincw x0, pow2
+# CHECK-NEXT: 1 2 0.50 uqincw x0, vl1
+# CHECK-NEXT: 1 2 0.50 uqincw z0.s
+# CHECK-NEXT: 1 2 0.50 uqincw z0.s, all, mul #16
+# CHECK-NEXT: 1 2 0.50 uqincw z0.s, pow2
+# CHECK-NEXT: 1 2 0.50 uqincw z0.s, pow2, mul #16
+# CHECK-NEXT: 1 4 0.50 uqrshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.50 uqrshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.50 uqrshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.50 uqrshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 uqrshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.50 uqrshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.50 uqrshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.50 uqrshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 uqrshrnb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 uqrshrnb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 uqrshrnb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 uqrshrnb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 uqrshrnb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 uqrshrnb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.50 uqrshrnt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 uqrshrnt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 uqrshrnt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 uqrshrnt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 uqrshrnt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 uqrshrnt z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.50 uqshl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: 1 4 0.50 uqshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.50 uqshl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: 1 4 0.50 uqshl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: 1 4 0.50 uqshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.50 uqshl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: 1 4 0.50 uqshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.50 uqshl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: 1 4 0.50 uqshl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: 1 4 0.50 uqshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 uqshl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: 1 4 0.50 uqshl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: 1 4 0.50 uqshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.50 uqshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.50 uqshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.50 uqshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 uqshrnb z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 uqshrnb z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 uqshrnb z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 uqshrnb z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 uqshrnb z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 uqshrnb z31.s, z31.d, #32
+# CHECK-NEXT: 1 4 0.50 uqshrnt z0.b, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 uqshrnt z0.h, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 uqshrnt z0.s, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 uqshrnt z31.b, z31.h, #8
+# CHECK-NEXT: 1 4 0.50 uqshrnt z31.h, z31.s, #16
+# CHECK-NEXT: 1 4 0.50 uqshrnt z31.s, z31.d, #32
+# CHECK-NEXT: 1 2 0.50 uqsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 uqsub z0.b, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 uqsub z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 uqsub z0.d, z0.d, #0
+# CHECK-NEXT: 1 2 0.50 uqsub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 uqsub z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 uqsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 uqsub z0.h, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 uqsub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 uqsub z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 uqsub z0.s, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 uqsub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: 1 2 0.50 uqsub z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 uqsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 uqsub z31.b, z31.b, #255
+# CHECK-NEXT: 1 2 0.50 uqsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 2 0.50 uqsub z31.d, z31.d, #65280
+# CHECK-NEXT: 1 2 0.50 uqsub z31.h, z31.h, #65280
+# CHECK-NEXT: 1 2 0.50 uqsub z31.s, z31.s, #65280
+# CHECK-NEXT: 1 2 0.50 uqsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 uqsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 uqsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 uqsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 uqxtnb z0.b, z31.h
+# CHECK-NEXT: 1 4 0.50 uqxtnb z0.h, z31.s
+# CHECK-NEXT: 1 4 0.50 uqxtnb z0.s, z31.d
+# CHECK-NEXT: 1 4 0.50 uqxtnt z0.b, z31.h
+# CHECK-NEXT: 1 4 0.50 uqxtnt z0.h, z31.s
+# CHECK-NEXT: 1 4 0.50 uqxtnt z0.s, z31.d
+# CHECK-NEXT: 2 4 2.00 urecpe z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 urhadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 urhadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 urhadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 urhadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 urshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.50 urshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.50 urshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.50 urshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 urshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 4 0.50 urshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 4 0.50 urshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 4 0.50 urshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 0.50 urshr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: 1 4 0.50 urshr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: 1 4 0.50 urshr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: 1 4 0.50 urshr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: 1 4 0.50 urshr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: 1 4 0.50 urshr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: 1 4 0.50 urshr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: 1 4 0.50 urshr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: 2 4 2.00 ursqrte z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 4 1.00 ursra z0.b, z0.b, #1
+# CHECK-NEXT: 1 4 1.00 ursra z0.d, z0.d, #1
+# CHECK-NEXT: 1 4 1.00 ursra z0.h, z0.h, #1
+# CHECK-NEXT: 1 4 1.00 ursra z0.s, z0.s, #1
+# CHECK-NEXT: 1 4 1.00 ursra z31.b, z31.b, #8
+# CHECK-NEXT: 1 4 1.00 ursra z31.d, z31.d, #64
+# CHECK-NEXT: 1 4 1.00 ursra z31.h, z31.h, #16
+# CHECK-NEXT: 1 4 1.00 ursra z31.s, z31.s, #32
+# CHECK-NEXT: 1 2 0.50 ushllb z0.d, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 ushllb z0.h, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 ushllb z0.s, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 ushllb z31.d, z31.s, #31
+# CHECK-NEXT: 1 2 0.50 ushllb z31.h, z31.b, #7
+# CHECK-NEXT: 1 2 0.50 ushllb z31.s, z31.h, #15
+# CHECK-NEXT: 1 2 0.50 ushllt z0.d, z0.s, #0
+# CHECK-NEXT: 1 2 0.50 ushllt z0.h, z0.b, #0
+# CHECK-NEXT: 1 2 0.50 ushllt z0.s, z0.h, #0
+# CHECK-NEXT: 1 2 0.50 ushllt z31.d, z31.s, #31
+# CHECK-NEXT: 1 2 0.50 ushllt z31.h, z31.b, #7
+# CHECK-NEXT: 1 2 0.50 ushllt z31.s, z31.h, #15
+# CHECK-NEXT: 1 3 0.50 usmmla z0.s, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 usqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: 1 2 0.50 usqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: 1 2 0.50 usqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: 1 2 0.50 usqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: 1 4 1.00 usra z0.b, z0.b, #1
+# CHECK-NEXT: 1 4 1.00 usra z0.d, z0.d, #1
+# CHECK-NEXT: 1 4 1.00 usra z0.h, z0.h, #1
+# CHECK-NEXT: 1 4 1.00 usra z0.s, z0.s, #1
+# CHECK-NEXT: 1 4 1.00 usra z31.b, z31.b, #8
+# CHECK-NEXT: 1 4 1.00 usra z31.d, z31.d, #64
+# CHECK-NEXT: 1 4 1.00 usra z31.h, z31.h, #16
+# CHECK-NEXT: 1 4 1.00 usra z31.s, z31.s, #32
+# CHECK-NEXT: 1 2 0.50 usublb z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 usublb z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 usublb z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 usublt z0.h, z1.b, z2.b
+# CHECK-NEXT: 1 2 0.50 usublt z29.s, z30.h, z31.h
+# CHECK-NEXT: 1 2 0.50 usublt z31.d, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 usubwb z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.50 usubwb z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.50 usubwb z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 2 0.50 usubwt z0.h, z1.h, z2.b
+# CHECK-NEXT: 1 2 0.50 usubwt z29.s, z30.s, z31.h
+# CHECK-NEXT: 1 2 0.50 usubwt z31.d, z31.d, z31.s
+# CHECK-NEXT: 1 2 0.50 uunpkhi z31.d, z31.s
+# CHECK-NEXT: 1 2 0.50 uunpkhi z31.h, z31.b
+# CHECK-NEXT: 1 2 0.50 uunpkhi z31.s, z31.h
+# CHECK-NEXT: 1 2 0.50 uunpklo z31.d, z31.s
+# CHECK-NEXT: 1 2 0.50 uunpklo z31.h, z31.b
+# CHECK-NEXT: 1 2 0.50 uunpklo z31.s, z31.h
+# CHECK-NEXT: 1 2 0.50 uxtb z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.50 uxtb z0.h, p0/m, z0.h
+# CHECK-NEXT: 1 2 0.50 uxtb z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 2 0.50 uxtb z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 uxtb z31.h, p7/m, z31.h
+# CHECK-NEXT: 1 2 0.50 uxtb z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 uxth z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.50 uxth z0.s, p0/m, z0.s
+# CHECK-NEXT: 1 2 0.50 uxth z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 uxth z31.s, p7/m, z31.s
+# CHECK-NEXT: 1 2 0.50 uxtw z0.d, p0/m, z0.d
+# CHECK-NEXT: 1 2 0.50 uxtw z31.d, p7/m, z31.d
+# CHECK-NEXT: 1 2 0.50 uzp1 p15.b, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 uzp1 p15.d, p15.d, p15.d
+# CHECK-NEXT: 1 2 0.50 uzp1 p15.h, p15.h, p15.h
+# CHECK-NEXT: 1 2 0.50 uzp1 p15.s, p15.s, p15.s
+# CHECK-NEXT: 1 2 0.50 uzp1 z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 uzp1 z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 uzp1 z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 uzp1 z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 uzp2 p15.b, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 uzp2 p15.d, p15.d, p15.d
+# CHECK-NEXT: 1 2 0.50 uzp2 p15.h, p15.h, p15.h
+# CHECK-NEXT: 1 2 0.50 uzp2 p15.s, p15.s, p15.s
+# CHECK-NEXT: 1 2 0.50 uzp2 z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 uzp2 z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 uzp2 z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 uzp2 z31.s, z31.s, z31.s
+# CHECK-NEXT: 2 3 1.00 whilege p15.b, w0, wzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.b, wzr, w0
+# CHECK-NEXT: 2 3 1.00 whilege p15.b, x0, xzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.b, xzr, x0
+# CHECK-NEXT: 2 3 1.00 whilege p15.d, w0, wzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.d, x0, xzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.h, w0, wzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.h, x0, xzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.s, w0, wzr
+# CHECK-NEXT: 2 3 1.00 whilege p15.s, x0, xzr
+# CHECK-NEXT: 2 3 1.00 whilerw p15.b, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilerw p15.d, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilerw p15.h, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilerw p15.s, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilewr p15.b, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilewr p15.d, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilewr p15.h, x30, x30
+# CHECK-NEXT: 2 3 1.00 whilewr p15.s, x30, x30
+# CHECK-NEXT: 1 2 1.00 * U wrffr p0.b
+# CHECK-NEXT: 1 2 1.00 * U wrffr p15.b
+# CHECK-NEXT: 1 2 0.50 xar z0.b, z0.b, z1.b, #1
+# CHECK-NEXT: 1 2 0.50 xar z0.d, z0.d, z1.d, #1
+# CHECK-NEXT: 1 2 0.50 xar z0.h, z0.h, z1.h, #1
+# CHECK-NEXT: 1 2 0.50 xar z0.s, z0.s, z1.s, #1
+# CHECK-NEXT: 1 2 0.50 xar z31.b, z31.b, z30.b, #8
+# CHECK-NEXT: 1 2 0.50 xar z31.d, z31.d, z30.d, #64
+# CHECK-NEXT: 1 2 0.50 xar z31.h, z31.h, z30.h, #16
+# CHECK-NEXT: 1 2 0.50 xar z31.s, z31.s, z30.s, #32
+# CHECK-NEXT: 1 2 0.50 zip1 p0.b, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 zip1 p0.d, p0.d, p0.d
+# CHECK-NEXT: 1 2 0.50 zip1 p0.h, p0.h, p0.h
+# CHECK-NEXT: 1 2 0.50 zip1 p0.s, p0.s, p0.s
+# CHECK-NEXT: 1 2 0.50 zip1 p15.b, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 zip1 p15.d, p15.d, p15.d
+# CHECK-NEXT: 1 2 0.50 zip1 p15.h, p15.h, p15.h
+# CHECK-NEXT: 1 2 0.50 zip1 p15.s, p15.s, p15.s
+# CHECK-NEXT: 1 2 0.50 zip1 z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 zip1 z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 zip1 z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 zip1 z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 zip1 z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 zip1 z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 zip1 z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 zip1 z31.s, z31.s, z31.s
+# CHECK-NEXT: 1 2 0.50 zip2 p0.b, p0.b, p0.b
+# CHECK-NEXT: 1 2 0.50 zip2 p0.d, p0.d, p0.d
+# CHECK-NEXT: 1 2 0.50 zip2 p0.h, p0.h, p0.h
+# CHECK-NEXT: 1 2 0.50 zip2 p0.s, p0.s, p0.s
+# CHECK-NEXT: 1 2 0.50 zip2 p15.b, p15.b, p15.b
+# CHECK-NEXT: 1 2 0.50 zip2 p15.d, p15.d, p15.d
+# CHECK-NEXT: 1 2 0.50 zip2 p15.h, p15.h, p15.h
+# CHECK-NEXT: 1 2 0.50 zip2 p15.s, p15.s, p15.s
+# CHECK-NEXT: 1 2 0.50 zip2 z0.b, z0.b, z0.b
+# CHECK-NEXT: 1 2 0.50 zip2 z0.d, z0.d, z0.d
+# CHECK-NEXT: 1 2 0.50 zip2 z0.h, z0.h, z0.h
+# CHECK-NEXT: 1 2 0.50 zip2 z0.s, z0.s, z0.s
+# CHECK-NEXT: 1 2 0.50 zip2 z31.b, z31.b, z31.b
+# CHECK-NEXT: 1 2 0.50 zip2 z31.d, z31.d, z31.d
+# CHECK-NEXT: 1 2 0.50 zip2 z31.h, z31.h, z31.h
+# CHECK-NEXT: 1 2 0.50 zip2 z31.s, z31.s, z31.s
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - 245.00 245.00 651.00 400.88 303.88 41.88 41.88 41.88 41.88 41.88 41.88 406.00 2376.50 1905.50
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs z0.b, p0/m, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 abs z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adclb z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adclb z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adclt z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adclt z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z0.s, z1.s, z2.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z21.b, p5/m, z21.b, z10.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z21.b, z10.b, z21.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z21.d, p5/m, z21.d, z10.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z21.d, z10.d, z21.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z21.h, p5/m, z21.h, z10.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z21.h, z10.h, z21.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z21.s, p5/m, z21.s, z10.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z21.s, z10.s, z21.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z23.b, p3/m, z23.b, z13.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z23.b, z13.b, z8.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z23.d, p3/m, z23.d, z13.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z23.h, p3/m, z23.h, z13.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z23.h, z13.h, z8.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z23.s, p3/m, z23.s, z13.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z23.s, z13.s, z8.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 add z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 addp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - addpl sp, sp, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - addpl x0, x0, #-32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - addpl x21, x21, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - addpl x23, x8, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - addvl sp, sp, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - addvl x0, x0, #-32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - addvl x21, x21, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - addvl x23, x8, #-1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.d, [z0.d, z0.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.d, [z0.d, z0.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.d, [z0.d, z0.d, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.d, [z0.d, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.d, [z0.d, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.d, [z0.d, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.d, [z0.d, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.d, [z0.d, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.d, [z0.d, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.d, [z0.d, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.d, [z0.d, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.d, [z0.d, z0.d]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.s, [z0.s, z0.s, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.s, [z0.s, z0.s, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.s, [z0.s, z0.s, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 adr z0.s, [z0.s, z0.s]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 aesd z0.b, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 aese z0.b, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 aesimc z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 aesimc z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 aesmc z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 aesmc z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - and p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z0.d, z0.d, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z0.s, z0.s, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z23.h, z23.h, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z23.h, z23.h, #0xfff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z5.b, z5.b, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 and z5.b, z5.b, #0xf9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ands p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 andv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 andv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 andv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 andv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.b, z1.b, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.h, z1.h, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z0.s, z1.s, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asr z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 asrd z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 asrd z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 asrd z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 asrd z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 asrd z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 asrd z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 asrd z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 asrd z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 asrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bcax z29.d, z29.d, z30.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 bdep z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 bdep z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 bdep z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 bdep z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 bext z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 bext z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 bext z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 bext z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfcvt z0.h, p0/m, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfcvtnt z0.h, p0/m, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfdot z0.s, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfdot z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfdot z0.s, z1.h, z2.h[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalb z0.s, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalb z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalb z0.s, z1.h, z2.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalb z10.s, z21.h, z14.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalb z21.s, z14.h, z3.h[2]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalt z0.s, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalt z0.s, z1.h, z2.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalt z0.s, z1.h, z2.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmlalt z14.s, z10.h, z21.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bfmmla z0.s, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 bgrp z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 bgrp z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 bgrp z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 bgrp z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bic p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bic p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bic z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bic z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bic z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bic z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bic z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bic z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bics p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - bics p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brka p0.b, p15/m, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brka p0.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkas p0.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkb p0.b, p15/m, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkb p0.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkbs p0.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkn p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkn p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkns p0.b, p15/z, p1.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkpa p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkpa p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkpas p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkpas p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkpb p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkpb p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkpbs p0.b, p15/z, p1.b, p2.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - brkpbs p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bsl z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bsl1n z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 bsl2n z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cadd z0.b, z0.b, z0.b, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cadd z0.d, z0.d, z0.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cadd z0.h, z0.h, z0.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cadd z0.s, z0.s, z0.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cadd z31.b, z31.b, z31.b, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cadd z31.d, z31.d, z31.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cadd z31.h, z31.h, z31.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cadd z31.s, z31.s, z31.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cdot z0.d, z1.h, z15.h[1], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cdot z0.d, z1.h, z31.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cdot z0.d, z1.h, z31.h, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cdot z0.d, z1.h, z31.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cdot z0.d, z1.h, z31.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cdot z0.s, z1.b, z31.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cdot z0.s, z1.b, z7.b[3], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cdot z29.d, z30.h, z0.h[0], #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cdot z31.d, z30.h, z7.h[1], #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cdot z5.d, z6.h, z3.h[0], #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clasta b0, p7, b0, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clasta d0, p7, d0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clasta h0, p7, h0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clasta s0, p7, s0, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 clasta w0, p7, w0, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 clasta w0, p7, w0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 clasta w0, p7, w0, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 clasta x0, p7, x0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clasta z0.b, p7, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clasta z0.d, p7, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clasta z0.h, p7, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clasta z0.s, p7, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clastb b0, p7, b0, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clastb d0, p7, d0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clastb h0, p7, h0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clastb s0, p7, s0, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 clastb w0, p7, w0, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 clastb w0, p7, w0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 clastb w0, p7, w0, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 clastb x0, p7, x0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clastb z0.b, p7, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clastb z0.d, p7, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clastb z0.h, p7, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 clastb z0.s, p7, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cls z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cls z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cls z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cls z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 clz z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 clz z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 clz z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 clz z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z0.b, z1.b, z2.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - cmla z0.d, z1.d, z2.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z0.h, z1.h, z2.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z0.h, z1.h, z2.h[0], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z0.s, z1.s, z2.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z0.s, z1.s, z2.s[0], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z15.b, z16.b, z17.b, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - cmla z15.d, z16.d, z17.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z15.h, z16.h, z17.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z15.s, z16.s, z17.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z29.b, z30.b, z31.b, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - cmla z29.d, z30.d, z31.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z29.h, z30.h, z31.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z29.s, z30.s, z31.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z31.b, z31.b, z31.b, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - cmla z31.d, z31.d, z31.d, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z31.h, z30.h, z7.h[0], #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z31.h, z31.h, z31.h, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z31.s, z30.s, z7.s[0], #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmla z31.s, z31.s, z31.s, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpeq p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphi p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.b, p0/z, z1.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmphs p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmple p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmple p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmple p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmple p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmple p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmple p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmple p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmple p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmple p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmple p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmple p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplo p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplo p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplo p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplo p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplo p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplo p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplo p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplo p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplo p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplo p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplo p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpls p0.b, p0/z, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpls p0.b, p0/z, z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpls p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpls p0.d, p0/z, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpls p0.d, p0/z, z0.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpls p0.h, p0/z, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpls p0.h, p0/z, z0.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpls p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpls p0.s, p0/z, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpls p0.s, p0/z, z0.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpls p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplt p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplt p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplt p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplt p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplt p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplt p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplt p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplt p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplt p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplt p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmplt p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.b, p0/z, z0.b, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.b, p0/z, z0.b, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.b, p0/z, z0.b, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.d, p0/z, z0.d, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.d, p0/z, z0.d, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.d, p0/z, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.h, p0/z, z0.h, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.h, p0/z, z0.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.h, p0/z, z0.h, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.s, p0/z, z0.s, #-16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.s, p0/z, z0.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.s, p0/z, z0.s, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - cmpne p0.s, p0/z, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cnot z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cnot z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cnot z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cnot z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cnt z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cnt z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cnt z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 cnt z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntb x0, #28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntd x0, #28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cnth x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cnth x0, #28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cnth x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cnth x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntp x0, p15, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntp x0, p15, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntp x0, p15, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntp x0, p15, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntw x0, #28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - cntw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 compact z31.d, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 compact z31.s, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - ctermeq w30, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - ctermeq wzr, w30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - ctermeq x30, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - ctermeq xzr, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - ctermne w30, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - ctermne wzr, w30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - ctermne x30, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - ctermne xzr, x30
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - decb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decb x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decb x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - decd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decd x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decd x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - dech x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - dech x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - dech x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - dech x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - dech x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decp x0, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decp x0, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decp x0, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decp x0, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decp xzr, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decp xzr, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decp xzr, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decp xzr, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 decp z31.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 decp z31.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 decp z31.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - decw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decw x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - decw x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 dupm z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 dupm z0.s, #0xfffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 dupm z23.h, #0xfff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 dupm z5.b, #0xf9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - eor p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z0.d, z0.d, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z0.s, z0.s, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z23.h, z23.h, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z23.h, z23.h, #0xfff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z5.b, z5.b, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor z5.b, z5.b, #0xf9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eor3 z29.d, z29.d, z30.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eorbt z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eorbt z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eorbt z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eorbt z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - eors p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eortb z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eortb z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eortb z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 eortb z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 eorv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 eorv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 eorv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 eorv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ext z0.b, { z1.b, z2.b }, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ext z31.b, z31.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ext z31.b, z31.b, z0.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ext z31.b, { z30.b, z31.b }, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabd z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabd z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabd z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabs z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabs z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fabs z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - facge p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - facge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - facge p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - facge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - facge p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - facge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - facgt p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - facgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - facgt p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - facgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - facgt p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - facgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadd z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fadda d0, p7, d0, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 9.00 fadda h0, p7, h0, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 5.00 fadda s0, p7, s0, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 faddp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 faddp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 faddp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 faddv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 2.00 faddv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.50 1.50 faddv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcadd z0.d, p0/m, z0.d, z0.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcadd z0.h, p0/m, z0.h, z0.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcadd z0.s, p0/m, z0.s, z0.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcadd z31.d, p7/m, z31.d, z31.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcadd z31.h, p7/m, z31.h, z31.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcadd z31.s, p7/m, z31.s, z31.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmeq p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmeq p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmeq p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmeq p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmeq p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmeq p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmge p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmge p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmge p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmge p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmge p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmge p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmge p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmge p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmge p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmgt p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmgt p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmgt p0.d, p0/z, z1.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmgt p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmgt p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmgt p0.h, p0/z, z1.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmgt p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmgt p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmgt p0.s, p0/z, z1.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z0.d, p0/m, z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z0.d, p0/m, z1.d, z2.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z0.h, p0/m, z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z0.h, p0/m, z1.h, z2.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z0.h, z0.h, z0.h[0], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z0.s, p0/m, z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z0.s, p0/m, z1.s, z2.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z21.s, z10.s, z5.s[1], #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z23.s, z13.s, z8.s[0], #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z29.d, p7/m, z30.d, z31.d, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z29.h, p7/m, z30.h, z31.h, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z29.s, p7/m, z30.s, z31.s, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z31.d, p7/m, z31.d, z31.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z31.h, p7/m, z31.h, z31.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z31.h, z31.h, z7.h[3], #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fcmla z31.s, p7/m, z31.s, z31.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmle p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmle p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmle p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmlt p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmlt p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmlt p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmne p0.d, p0/z, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmne p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmne p0.h, p0/z, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmne p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmne p0.s, p0/z, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmne p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmuo p0.d, p0/z, z0.d, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmuo p0.h, p0/z, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcmuo p0.s, p0/z, z0.s, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvt z0.d, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvt z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvt z0.h, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvt z0.h, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvt z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvt z0.s, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtlt z0.s, p0/m, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtlt z30.d, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtnt z0.h, p0/m, z1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtnt z30.s, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtx z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtx z30.s, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtxnt z0.s, p0/m, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtxnt z30.s, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs z0.d, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtzs z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzs z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzs z0.s, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzs z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu z0.d, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - fcvtzu z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - fcvtzu z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzu z0.s, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - fcvtzu z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 fdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 8.00 fdiv z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 fdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 fdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 8.00 fdivr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 fdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 fexpa z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 fexpa z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 fexpa z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - flogb z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - flogb z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - flogb z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmax z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnm z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnmp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnmp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxnmp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 fmaxnmv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 2.00 fmaxnmv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.50 1.50 fmaxnmv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmaxp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 fmaxv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 2.00 fmaxv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.50 1.50 fmaxv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmin z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm z0.d, p0/m, z0.d, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm z0.h, p0/m, z0.h, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm z0.s, p0/m, z0.s, #0.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnm z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnmp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnmp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminnmp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 fminnmv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 2.00 fminnmv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.50 1.50 fminnmv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminp z29.s, p3/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fminp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 fminv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 2.00 fminv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.50 1.50 fminv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmla z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmla z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmla z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlalb z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlalb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlalb z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlalt z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlalt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlalt z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmls z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmls z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmls z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlslb z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlslb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlslb z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlslt z0.s, z1.h, z7.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlslt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmlslt z30.s, z31.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov z0.d, #-10.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov z0.d, #0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov z0.d, p0/m, #-10.00000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov z0.d, p0/m, #0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov z0.h, #-0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov z0.h, p0/m, #-0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov z0.s, #-0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmov z0.s, p0/m, #-0.12500000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmsb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmsb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmsb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z0.d, z0.d, z0.d[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z0.h, z0.h, z0.h[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z0.s, z0.s, z0.s[0]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z31.d, p7/m, z31.d, #2.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z31.d, z31.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z31.h, p7/m, z31.h, #2.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z31.h, z31.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z31.s, p7/m, z31.s, #2.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmul z31.s, z31.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmulx z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmulx z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fmulx z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fneg z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fneg z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fneg z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmsb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmsb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fnmsb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frecpe z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frecpe z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frecpe z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frecps z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frecps z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frecps z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frecpx z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frecpx z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frecpx z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frinta z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frinta z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frinta z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frinti z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frinti z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frinti z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintm z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frintm z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintm z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintn z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frintn z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintn z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintp z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frintp z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintp z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintx z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frintx z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintx z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frintz z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frintz z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frintz z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - frsqrte z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - frsqrte z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - frsqrte z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frsqrts z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frsqrts z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 frsqrts z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fscale z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fscale z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fscale z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 2.00 fsqrt z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 8.00 fsqrt z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 4.00 fsqrt z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsub z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsubr z0.d, p0/m, z0.d, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsubr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsubr z0.h, p0/m, z0.h, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsubr z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsubr z0.s, p0/m, z0.s, #0.5
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsubr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsubr z31.d, p7/m, z31.d, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsubr z31.h, p7/m, z31.h, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 fsubr z31.s, p7/m, z31.s, #1.0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ftmad z0.d, z0.d, z31.d, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ftmad z0.h, z0.h, z31.h, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ftmad z0.s, z0.s, z31.s, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ftsmul z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ftsmul z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ftsmul z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ftssel z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ftssel z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ftssel z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 histcnt z0.s, p0/z, z1.s, z2.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 histcnt z29.d, p7/z, z30.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 histseg z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - incb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incb x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incb x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - incd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incd x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incd x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 incd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 incd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - inch x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - inch x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - inch x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - inch x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - inch x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 inch z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 inch z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incp x0, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incp x0, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incp x0, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incp x0, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incp xzr, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incp xzr, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incp xzr, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incp xzr, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 incp z31.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 incp z31.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 incp z31.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - incw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incw x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - incw x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 incw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 incw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - index z0.b, #0, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - index z0.d, #0, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - index z0.h, #0, #0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z0.h, w0, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - index z0.s, #0, #0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z21.b, w10, w21
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - - - - - - 2.00 - index z21.d, x10, x21
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z21.s, w10, w21
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z23.b, #13, w8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z23.b, w13, #8
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - - - - - - 2.00 - index z23.d, #13, x8
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - - - - - - 2.00 - index z23.d, x13, #8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z23.h, #13, w8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z23.h, w13, #8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z23.s, #13, w8
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z23.s, w13, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - index z31.b, #-1, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z31.b, #-1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z31.b, wzr, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z31.b, wzr, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - index z31.d, #-1, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - - - - - - 2.00 - index z31.d, #-1, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - - - - - - 2.00 - index z31.d, xzr, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 2.00 - - - - - - - - 2.00 - index z31.d, xzr, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - index z31.h, #-1, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z31.h, #-1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z31.h, wzr, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z31.h, wzr, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - index z31.s, #-1, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z31.s, #-1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z31.s, wzr, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 1.00 - index z31.s, wzr, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 insr z0.b, w0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 insr z0.d, x0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 insr z0.h, w0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 insr z0.s, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 insr z31.b, b31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 insr z31.b, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 insr z31.d, d31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 insr z31.d, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 insr z31.h, h31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 insr z31.h, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 insr z31.s, s31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 insr z31.s, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lasta b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lasta d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lasta h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lasta s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 lasta w0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 lasta w0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 lasta w0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 lasta x0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lastb b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lastb d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lastb h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lastb s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 lastb w0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 lastb w0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 lastb w0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - 1.00 lastb x0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z0.b }, p0/z, [sp, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z0.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z21.b }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z21.s }, p5/z, [x10, x21]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z23.d }, p3/z, [x13, x8]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z31.b }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1b { z5.h }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1d { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1d { z23.d }, p3/z, [sp, x8, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1d { z23.d }, p3/z, [x13, x8, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1d { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z21.s }, p5/z, [x10, x21, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z23.d }, p3/z, [x13, x8, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 4.00 4.00 ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 4.00 4.00 ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z5.h }, p3/z, [sp, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1h { z5.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rb { z0.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rb { z31.b }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rb { z31.d }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rb { z31.h }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rb { z31.s }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rd { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rd { z31.d }, p7/z, [sp, #504]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rh { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rh { z31.d }, p7/z, [sp, #126]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rh { z31.h }, p7/z, [sp, #126]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rh { z31.s }, p7/z, [sp, #126]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqb { z0.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqb { z0.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqb { z21.b }, p5/z, [x10, #112]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqb { z23.b }, p3/z, [x13, #-128]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqb { z31.b }, p7/z, [sp, #-16]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqd { z0.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqd { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqd { z23.d }, p3/z, [x13, #-128]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqd { z23.d }, p3/z, [x13, #112]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqd { z31.d }, p7/z, [sp, #-16]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqh { z0.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqh { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqh { z23.h }, p3/z, [x13, #-128]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqh { z23.h }, p3/z, [x13, #112]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqh { z31.h }, p7/z, [sp, #-16]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqw { z0.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqw { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqw { z23.s }, p3/z, [x13, #-128]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqw { z23.s }, p3/z, [x13, #112]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rqw { z31.s }, p7/z, [sp, #-16]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rsb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rsb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rsb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rsb { z31.d }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rsb { z31.h }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rsb { z31.s }, p7/z, [sp, #63]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rsh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rsh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rsh { z31.d }, p7/z, [sp, #126]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rsh { z31.s }, p7/z, [sp, #126]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rsw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rsw { z31.d }, p7/z, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rw { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rw { z31.d }, p7/z, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1rw { z31.s }, p7/z, [sp, #252]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z0.h }, p0/z, [sp, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z0.h }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z21.s }, p5/z, [x10, x21]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z23.d }, p3/z, [x13, x8]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sh { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sh { z21.s }, p5/z, [sp, x21, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sh { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sh { z21.s }, p5/z, [x10, x21, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sh { z23.d }, p3/z, [x13, x8, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 4.00 4.00 ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 4.00 4.00 ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sw { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sw { z23.d }, p3/z, [sp, x8, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sw { z23.d }, p3/z, [x13, x8, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1w { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1w { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1w { z21.s }, p5/z, [sp, x21, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1w { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1w { z21.s }, p5/z, [x10, x21, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1w { z23.d }, p3/z, [x13, x8, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1w { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ld1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ld1w { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 4.00 4.00 ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 4.00 4.00 ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ld1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 1.00 1.00 ld2b { z0.b, z1.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ld2b { z0.b, z1.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 1.00 1.00 ld2b { z5.b, z6.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 1.00 1.00 ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ld2d { z0.d, z1.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 1.00 1.00 ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 1.00 1.00 ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ld2h { z0.h, z1.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 1.00 1.00 ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 1.00 1.00 ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ld2w { z0.s, z1.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 - 1.00 1.00 ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 1.50 1.50 ld3b { z0.b - z2.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.50 1.50 ld3b { z0.b - z2.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.50 1.50 ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.50 1.50 ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 1.50 1.50 ld3b { z5.b - z7.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 1.50 1.50 ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.50 1.50 ld3d { z0.d - z2.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.50 1.50 ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.50 1.50 ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 1.50 1.50 ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 1.50 1.50 ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.50 1.50 ld3h { z0.h - z2.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.50 1.50 ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.50 1.50 ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 1.50 1.50 ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 1.50 1.50 ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.50 1.50 ld3w { z0.s - z2.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.50 1.50 ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 - - - - - - - - - 1.50 1.50 ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 1.00 0.38 0.38 0.38 0.38 0.38 0.38 0.38 0.38 - 1.50 1.50 ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 4.00 4.00 ld4b { z0.b - z3.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 4.00 4.00 ld4b { z0.b - z3.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 4.00 4.00 ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 4.00 4.00 ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 4.00 4.00 ld4b { z5.b - z8.b }, p3/z, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 4.00 4.00 ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 4.00 4.00 ld4d { z0.d - z3.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 4.00 4.00 ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 4.00 4.00 ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 4.00 4.00 ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 4.00 4.00 ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 4.00 4.00 ld4h { z0.h - z3.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 4.00 4.00 ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 4.00 4.00 ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 4.00 4.00 ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 4.00 4.00 ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 4.00 4.00 ld4w { z0.s - z3.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 4.00 4.00 ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 - - - - - - - - - 4.00 4.00 ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 1.33 1.33 1.33 0.50 0.50 0.50 0.50 0.50 0.50 0.50 0.50 - 4.00 4.00 ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1b { z0.d }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1b { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1b { z0.h }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1b { z0.s }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1b { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1b { z31.b }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1b { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1b { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1b { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1b { z31.h }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1b { z31.s }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1b { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1d { z0.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1d { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1d { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1d { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1d { z31.d }, p7/z, [z31.d, #248]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1h { z0.d }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1h { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1h { z0.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1h { z0.s }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1h { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1h { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1h { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1h { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1h { z31.h }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 4.00 4.00 ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 4.00 4.00 ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1h { z31.s }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1h { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1sb { z0.d }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sb { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1sb { z0.h }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1sb { z0.s }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sb { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1sb { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1sb { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sb { z31.d }, p7/z, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1sb { z31.h }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1sb { z31.s }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sb { z31.s }, p7/z, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1sh { z0.d }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sh { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1sh { z0.s }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sh { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1sh { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1sh { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sh { z31.d }, p7/z, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 4.00 4.00 ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 4.00 4.00 ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1sh { z31.s }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sh { z31.s }, p7/z, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1sw { z0.d }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sw { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1sw { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1sw { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1sw { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1w { z0.d }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1w { z0.d }, p0/z, [z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1w { z0.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1w { z0.s }, p0/z, [z0.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 1.00 1.00 ldff1w { z31.d }, p7/z, [sp, z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1w { z31.d }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1w { z31.d }, p7/z, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 4.00 4.00 ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 4.00 4.00 ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 - - - ldff1w { z31.s }, p7/z, [sp]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - 2.00 2.00 ldff1w { z31.s }, p7/z, [z31.s, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1b { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1b { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1b { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1b { z21.b }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1b { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1b { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1b { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1b { z31.b }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1b { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1b { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1b { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1d { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1d { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1h { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1h { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1h { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1h { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1h { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1h { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1h { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1h { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sb { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sb { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sb { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sb { z21.h }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sb { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sb { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sb { z31.h }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sb { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sh { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sh { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sh { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sh { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sh { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sw { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1sw { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1w { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1w { z21.d }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1w { z21.s }, p5/z, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1w { z31.d }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnf1w { z31.s }, p7/z, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1b { z0.b }, p0/z, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1b { z0.b }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1b { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1b { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1b { z21.b }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1b { z23.b }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1b { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1b { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1b { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1b { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1d { z0.d }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1d { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1d { z21.d }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1d { z23.d }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1d { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1d { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1h { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1h { z0.h }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1h { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1h { z21.h }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1h { z23.h }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1h { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1h { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1h { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1h { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1sb { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1sb { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1sb { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1sb { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1sb { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1sb { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1sh { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1sh { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1sh { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1sh { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1sh { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1sh { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1sw { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1sw { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1sw { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1w { z0.d }, p0/z, [z1.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1w { z0.s }, p0/z, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1w { z0.s }, p0/z, [z1.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1w { z21.s }, p5/z, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldnt1w { z23.s }, p3/z, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1w { z31.d }, p7/z, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 1.00 1.00 ldnt1w { z31.d }, p7/z, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1w { z31.s }, p7/z, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - 0.67 0.67 0.67 - - - - - - - - - 2.00 2.00 ldnt1w { z31.s }, p7/z, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.50 0.50 - - - - - - - - - ldr p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.50 0.50 - - - - - - - - - ldr p5, [x10, #255, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 0.50 0.50 - - - - - - - - - ldr p7, [x13, #-256, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr z0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr z23, [x13, #255, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - ldr z31, [sp, #-256, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.b, z1.b, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.h, z1.h, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z0.s, z1.s, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z31.b, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z31.d, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z31.h, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsl z31.s, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lslr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lslr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lslr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lslr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.b, p0/m, z0.b, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.b, z1.b, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.h, p0/m, z0.h, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.h, z1.h, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.s, p0/m, z0.s, z1.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z0.s, z1.s, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsr z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsrr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsrr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsrr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 lsrr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mad z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - mad z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mad z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mad z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - match p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - match p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - match p15.b, p7/z, z30.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - match p15.h, p7/z, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mla z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - mla z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - mla z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mla z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mla z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mla z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mla z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mls z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - mls z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - mls z0.d, z1.d, z7.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mls z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mls z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mls z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mls z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - mov p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - mov p0.b, p0/m, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - mov p0.b, p0/z, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - mov p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - mov p15.b, p15/m, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - mov p15.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.b, b0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.b, p0/m, b0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 mov z0.b, p0/m, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.b, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - mov z0.b, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.d, #0xe0000000000003ff
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.d, #0xffffffffffff7fff
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.d, #32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.d, d0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.d, p0/m, d0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 mov z0.d, p0/m, x0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - mov z0.d, x0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.h, #-256
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.h, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.h, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.h, #32767
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.h, h0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.h, p0/m, h0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 mov z0.h, p0/m, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.h, p0/z, #32512
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - mov z0.h, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.q, q0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.s, #0xffff7fff
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.s, #32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.s, p0/m, s0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 mov z0.s, p0/m, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z0.s, s0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - mov z0.s, w0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.d, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.d, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.d, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.d, p0/z, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.d, p0/z, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.d, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.d, p0/z, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.d, p15/m, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.d, p15/m, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.h, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.h, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.h, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.h, p0/z, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.h, p0/z, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.h, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.h, p0/z, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.h, p15/m, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.h, p15/m, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.s, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.s, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.s, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.s, p0/z, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.s, p0/z, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.s, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.s, p0/z, #32512
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.s, p15/m, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z21.s, p15/m, #-32768
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.b, p15/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.b, p7/m, b31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 movprfx z31, z6
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 mov z31.b, p7/m, wsp
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - mov z31.b, wsp
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.b, z31.b[63]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.d, p15/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.d, p7/m, d31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 movprfx z31.d, p7/z, z6.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 mov z31.d, p7/m, sp
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - mov z31.d, sp
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.d, z31.d[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.h, p15/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.h, p7/m, h31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 mov z31.h, p7/m, wsp
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - mov z31.h, wsp
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.h, z31.h[31]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.s, p15/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.s, p7/m, s31
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - 0.50 0.50 mov z31.s, p7/m, wsp
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - mov z31.s, wsp
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z31.s, z31.s[15]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z5.b, #-1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z5.b, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z5.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z5.b, p0/z, #-1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z5.b, p0/z, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z5.b, p0/z, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z5.b, p15/m, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z5.d, #-6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z5.h, #-6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z5.q, z17.q[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 mov z5.s, #-6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - movs p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - movs p0.b, p0/z, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - movs p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - movs p15.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mrs x3, ID_AA64ZFR0_EL1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mrs x3, ZCR_EL1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mrs x3, ZCR_EL12
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mrs x3, ZCR_EL2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mrs x3, ZCR_EL3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - msb z0.b, p7/m, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - msb z0.d, p7/m, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - msb z0.h, p7/m, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - msb z0.s, p7/m, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - msr ZCR_EL1, x3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - msr ZCR_EL12, x3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - msr ZCR_EL2, x3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - msr ZCR_EL3, x3
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z0.b, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - mul z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - mul z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z0.h, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z29.s, z30.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z31.b, z31.b, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z31.b, z31.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - mul z31.d, z31.d, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - mul z31.d, z31.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - mul z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z31.h, z31.h, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z31.h, z31.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z31.s, z31.s, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - mul z31.s, z31.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - nand p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - nand p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - nands p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - nands p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 nbsl z0.d, z0.d, z1.d, z2.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg z0.b, p0/m, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 neg z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - nmatch p0.b, p0/z, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - nmatch p0.h, p0/z, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - nmatch p15.b, p7/z, z30.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - 1.00 - nmatch p15.h, p7/z, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - nor p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - nor p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - nors p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - nors p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - not p0.b, p0/z, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - not p15.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 not z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 not z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 not z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 not z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - nots p0.b, p0/z, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - nots p15.b, p15/z, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - orn p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - orn p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - orns p0.b, p0/z, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - orns p15.b, p15/z, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - orr p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z0.d, z0.d, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z0.d, z0.d, #0xfffffffffffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z0.s, z0.s, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z0.s, z0.s, #0xfffffff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z23.h, z23.h, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z23.h, z23.h, #0xfff9
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z5.b, z5.b, #0x6
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 orr z5.b, z5.b, #0xf9
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - orrs p0.b, p0/z, p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 orv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 orv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 orv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 1.50 orv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - pfalse p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - pfirst p0.b, p15, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - pfirst p15.b, p15, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 pmul z0.b, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 pmul z29.b, z30.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 pmullb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 pmullb z29.q, z30.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 pmullb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 pmullt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 pmullt z29.q, z30.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 pmullt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - pnext p0.b, p15, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - pnext p0.d, p15, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - pnext p0.h, p15, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - pnext p0.s, p15, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - pnext p15.b, p15, p15.b
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb #14, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb #15, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb #6, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb #7, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb #7, p3, [z13.s, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb #7, p3, [z13.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl1keep, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl1keep, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl1keep, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl3strm, p5, [x10, z21.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl3strm, p5, [x10, z21.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl3strm, p5, [z10.d, #31]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pldl3strm, p5, [z10.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pstl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pstl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pstl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pstl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pstl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfb pstl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd #14, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd #15, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd #15, p7, [z31.d, #248]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd #15, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd #15, p7, [z31.s, #248]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd #15, p7, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd #6, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd #7, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl1keep, p0, [x0, z0.d, lsl #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl1keep, p0, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl1keep, p0, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl1keep, p0, [x0, z0.s, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl1keep, p0, [x0, z0.s, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pldl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pstl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pstl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pstl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pstl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pstl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfd pstl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh #14, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh #15, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh #15, p7, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh #15, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh #15, p7, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh #15, p7, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh #6, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh #7, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl1keep, p0, [x0, z0.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl3strm, p5, [x10, z21.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl3strm, p5, [x10, z21.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl3strm, p5, [x10, z21.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pldl3strm, p5, [x10, z21.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pstl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pstl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pstl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pstl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pstl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfh pstl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw #14, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw #15, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw #15, p7, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw #15, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw #15, p7, [z31.s, #124]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw #15, p7, [z31.s]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw #6, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw #7, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw #7, p3, [x13, z8.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pldl1keep, p0, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pldl1keep, p0, [x0, z0.s, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pldl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pldl1strm, p0, [x0, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pldl1strm, p0, [x0, #31, mul vl]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pldl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pldl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pldl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pldl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pldl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pldl3strm, p5, [x10, z21.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pldl3strm, p5, [x10, z21.s, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pstl1keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pstl1strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pstl2keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pstl2strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pstl3keep, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - 0.33 0.33 0.33 - - - - - - - - - - - prfw pstl3strm, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptest p15, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptest p15, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p0.b, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #17
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #19
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #20
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #22
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #23
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #24
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #25
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #26
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #27
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, #28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, mul3
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, mul4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl128
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl256
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl3
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl5
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl64
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl7
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrue p7.s, vl8
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p0.b, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #15
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #17
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #18
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #19
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #20
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #21
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #22
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #23
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #24
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #25
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #26
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #27
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, #28
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, mul3
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, mul4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl128
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl256
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl3
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl4
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl5
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl6
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl64
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl7
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - ptrues p7.s, vl8
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - punpkhi p0.h, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - punpkhi p15.h, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - punpklo p0.h, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - punpklo p15.h, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 raddhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 raddhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 raddhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 raddhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 raddhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 raddhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rax1 z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rbit z0.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rbit z0.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rbit z0.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rbit z0.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - rdffr p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - - - rdffr p0.b, p0/z
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - rdffr p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - - - rdffr p15.b, p15/z
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - - - rdffrs p0.b, p0/z
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - - - rdffrs p15.b, p15/z
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - rdvl x0, #0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - rdvl x21, #-32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - rdvl x23, #31
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - rdvl xzr, #-1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - rev p0.b, p1.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - rev p0.d, p1.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - rev p0.h, p1.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - rev p0.s, p1.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rev z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 revb z0.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 revb z0.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 revb z0.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 revh z0.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 revh z0.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 revw z0.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrnb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrnb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrnb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrnb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrnb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrnb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrnt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrnt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrnt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrnt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrnt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rshrnt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rsubhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rsubhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rsubhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rsubhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rsubhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 rsubhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saba z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saba z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saba z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saba z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabalb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabalb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabalb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabalt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabalt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabalt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabd z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabd z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabd z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabd z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabdlb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabdlb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabdlb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabdlt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabdlt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sabdlt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sadalp z0.h, p0/m, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sadalp z29.s, p0/m, z30.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sadalp z30.d, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlbt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlbt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlbt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddlt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 5.00 saddv d0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 3.00 saddv d0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 3.00 saddv d0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddwb z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddwb z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddwb z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddwt z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddwt z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 saddwt z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sbclb z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sbclb z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sbclt z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sbclt z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - scvtf z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - scvtf z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - scvtf z0.h, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - scvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - scvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - scvtf z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - scvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 20.00 - sdiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 11.00 - sdiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 20.00 - sdivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 11.00 - sdivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sdot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sdot z0.d, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sdot z0.s, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sdot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sel p0.b, p1, p2.b, p3.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sel z23.b, p11, z13.b, z8.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sel z23.d, p11, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sel z23.h, p11, z13.h, z8.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sel z23.s, p11, z13.s, z8.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - setffr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrnb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrnb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrnb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrnb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrnb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrnb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrnt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrnt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrnt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrnt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrnt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shrnt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 shsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli z31.b, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli z31.d, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli z31.h, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sli z31.s, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sm4e z0.s, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sm4ekey z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax z0.b, z0.b, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax z0.d, z0.d, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax z0.h, z0.h, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax z0.s, z0.s, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax z31.b, z31.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax z31.d, z31.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax z31.h, z31.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smax z31.s, z31.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smaxp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smaxp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smaxp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smaxp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 5.00 smaxv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 smaxv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 3.00 smaxv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 3.00 smaxv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin z0.b, z0.b, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin z0.d, z0.d, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin z0.h, z0.h, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin z0.s, z0.s, #-128
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin z31.b, z31.b, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin z31.d, z31.d, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin z31.h, z31.h, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smin z31.s, z31.s, #127
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sminp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sminp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sminp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sminp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 5.00 sminv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 sminv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 3.00 sminv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 3.00 sminv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlalb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlalb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlalb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlalb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlalb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlalt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlalt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlalt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlalt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlslb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlslb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlslb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlslb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlslb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlslt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlslt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlslt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlslt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smlslt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 smmla z0.s, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smulh z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smulh z0.b, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - smulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smulh z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smulh z0.h, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smulh z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smulh z29.s, z30.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - smulh z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smullb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smullb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smullb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smullb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smullb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smullt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smullt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smullt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smullt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - smullt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 splice z29.b, p7, { z30.b, z31.b }
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 splice z29.d, p7, { z30.d, z31.d }
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 splice z29.h, p7, { z30.h, z31.h }
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 splice z29.s, p7, { z30.s, z31.s }
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 splice z31.b, p7, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 splice z31.d, p7, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 splice z31.h, p7, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 splice z31.s, p7, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqabs z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqadd z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqcadd z0.b, z0.b, z0.b, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqcadd z0.d, z0.d, z0.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqcadd z0.h, z0.h, z0.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqcadd z0.s, z0.s, z0.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqcadd z31.b, z31.b, z31.b, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqcadd z31.d, z31.d, z31.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqcadd z31.h, z31.h, z31.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqcadd z31.s, z31.s, z31.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecb x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecb x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecb x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecb x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecb x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecb x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecd x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecd x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecd x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecd x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecd x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecd x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqdecd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqdecd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqdecd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdech x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdech x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdech x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdech x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdech x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdech x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdech x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdech x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdech x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqdech z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqdech z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqdech z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqdech z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecp x0, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecp x0, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecp x0, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecp x0, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecp xzr, p15.b, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecp xzr, p15.d, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecp xzr, p15.h, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecp xzr, p15.s, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 sqdecp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 sqdecp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 sqdecp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecw x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecw x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecw x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecw x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecw x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqdecw x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqdecw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqdecw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqdecw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalb z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalbt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalbt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalbt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalt z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslb z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslbt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslbt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslbt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslt z0.d, z1.s, z15.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmlslt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmulh z0.b, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqdmulh z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmulh z0.h, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmulh z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmulh z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmulh z29.s, z30.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqdmulh z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmullb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmullb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmullb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmullb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmullb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmullt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmullt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmullt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmullt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqdmullt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincb x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincb x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincb x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincb x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincb x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincb x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincd x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincd x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincd x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincd x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincd x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincd x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqincd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqincd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqincd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqincd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqinch x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqinch x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqinch x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqinch x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqinch x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqinch x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqinch x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqinch x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqinch x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqinch z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqinch z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqinch z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqinch z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincp x0, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincp x0, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincp x0, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincp x0, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincp xzr, p15.b, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincp xzr, p15.d, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincp xzr, p15.h, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincp xzr, p15.s, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 sqincp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 sqincp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 sqincp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincw x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincw x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincw x0, w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincw x0, w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincw x0, w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - sqincw x0, w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqincw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqincw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqincw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqincw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg z31.b, p7/m, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqneg z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z0.b, z1.b, z2.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdcmlah z0.d, z1.d, z2.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z0.h, z1.h, z2.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z0.h, z1.h, z2.h[0], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z0.s, z1.s, z2.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z0.s, z1.s, z2.s[0], #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z15.b, z16.b, z17.b, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdcmlah z15.d, z16.d, z17.d, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z15.h, z16.h, z17.h, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z15.s, z16.s, z17.s, #270
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z29.b, z30.b, z31.b, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdcmlah z29.d, z30.d, z31.d, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z29.h, z30.h, z31.h, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z29.s, z30.s, z31.s, #90
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z31.b, z31.b, z31.b, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdcmlah z31.d, z31.d, z31.d, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z31.h, z30.h, z7.h[0], #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z31.h, z31.h, z31.h, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z31.s, z30.s, z7.s[0], #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdcmlah z31.s, z31.s, z31.s, #180
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmlah z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlah z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmlah z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmlah z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmlah z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmlah z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmlsh z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmlsh z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmlsh z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmlsh z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmlsh z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmlsh z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmulh z0.b, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmulh z0.d, z1.d, z15.d[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmulh z0.h, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmulh z0.h, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmulh z0.s, z1.s, z7.s[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - sqrdmulh z29.s, z30.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - sqrdmulh z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrnb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrnb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrnb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrnb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrnb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrnb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrnt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrnt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrnt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrnt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrnt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrnt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrunb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrunb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrunb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrunb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrunb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrunb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrunt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrunt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrunt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrunt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrunt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqrshrunt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshlu z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrnb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrnb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrnb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrnb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrnb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrnb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrnt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrnt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrnt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrnt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrnt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrnt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrunb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrunb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrunb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrunb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrunb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrunb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrunt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrunt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrunt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrunt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrunt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqshrunt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsub z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtnb z0.b, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtnb z0.h, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtnb z0.s, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtnt z0.b, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtnt z0.h, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtnt z0.s, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtunb z0.b, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtunb z0.h, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtunb z0.s, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtunt z0.b, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtunt z0.h, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sqxtunt z0.s, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srhadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srhadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srhadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srhadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sri z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 srshr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 srsra z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 srsra z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 srsra z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 srsra z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 srsra z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 srsra z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 srsra z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 srsra z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshllb z0.d, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshllb z0.h, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshllb z0.s, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshllb z31.d, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshllb z31.h, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshllb z31.s, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshllt z0.d, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshllt z0.h, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshllt z0.s, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshllt z31.d, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshllt z31.h, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sshllt z31.s, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ssra z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ssra z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ssra z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ssra z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ssra z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ssra z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ssra z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ssra z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssublb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssublb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssublb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssublbt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssublbt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssublbt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssublt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssublt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssublt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubltb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubltb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubltb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubwb z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubwb z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubwb z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubwt z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubwt z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ssubwt z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z0.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z0.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z0.d }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1b { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1b { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1b { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z0.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1b { z0.d }, p7, [z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z0.h }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z0.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z0.s }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1b { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1b { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z0.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1b { z0.s }, p7, [z0.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z21.b }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z21.h }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z21.s }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z31.b }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1b { z31.d }, p7, [z31.d, #31]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z31.h }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1b { z31.s }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1b { z31.s }, p7, [z31.s, #31]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1d { z0.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1d { z0.d }, p0, [x0, z0.d, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1d { z0.d }, p0, [x0, z0.d, sxtw #3]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1d { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1d { z0.d }, p0, [x0, z0.d, uxtw #3]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1d { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1d { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1d { z0.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1d { z0.d }, p7, [z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1d { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1d { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1d { z31.d }, p7, [z31.d, #248]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 st1h { z0.d }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1h { z0.d }, p0, [x0, z0.d, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1h { z0.d }, p0, [x0, z0.d, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1h { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1h { z0.d }, p0, [x0, z0.d, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1h { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1h { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1h { z0.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1h { z0.d }, p7, [z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 st1h { z0.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1h { z0.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 st1h { z0.s }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1h { z0.s }, p0, [x0, z0.s, sxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1h { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1h { z0.s }, p0, [x0, z0.s, uxtw #1]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1h { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1h { z0.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1h { z0.s }, p7, [z0.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1h { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1h { z21.h }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1h { z21.s }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1h { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1h { z31.d }, p7, [z31.d, #62]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1h { z31.h }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1h { z31.s }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1h { z31.s }, p7, [z31.s, #62]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1w { z0.d }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1w { z0.d }, p0, [x0, z0.d, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1w { z0.d }, p0, [x0, z0.d, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1w { z0.d }, p0, [x0, z0.d, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1w { z0.d }, p0, [x0, z0.d, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1w { z0.d }, p0, [x0, z0.d, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1w { z0.d }, p0, [x0, z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1w { z0.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1w { z0.d }, p7, [z0.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1w { z0.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1w { z0.s }, p0, [x0, z0.s, sxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1w { z0.s }, p0, [x0, z0.s, sxtw]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1w { z0.s }, p0, [x0, z0.s, uxtw #2]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1w { z0.s }, p0, [x0, z0.s, uxtw]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1w { z0.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1w { z0.s }, p7, [z0.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1w { z21.d }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1w { z21.s }, p5, [x10, #5, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1w { z31.d }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st1w { z31.d }, p7, [z31.d, #124]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st1w { z31.s }, p7, [sp, #-1, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 st1w { z31.s }, p7, [z31.s, #124]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st2b { z0.b, z1.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st2b { z0.b, z1.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st2b { z21.b, z22.b }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st2b { z5.b, z6.b }, p3, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st2d { z0.d, z1.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st2d { z21.d, z22.d }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 1.00 1.00 1.00 st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st2h { z0.h, z1.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st2h { z21.h, z22.h }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25 1.00 1.00 1.00 st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st2w { z0.s, z1.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st2w { z21.s, z22.s }, p5, [x10, #10, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 st3b { z0.b - z2.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 st3b { z0.b - z2.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 st3b { z21.b - z23.b }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 st3b { z5.b - z7.b }, p3, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 st3d { z0.d - z2.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 st3d { z21.d - z23.d }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 st3h { z0.h - z2.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 st3h { z21.h - z23.h }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 st3w { z0.s - z2.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 st3w { z21.s - z23.s }, p5, [x10, #15, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 - - - - - - - - 4.50 4.50 4.50 st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 4.50 1.13 1.13 1.13 1.13 1.13 1.13 1.13 1.13 4.50 4.50 4.50 st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 st4b { z0.b - z3.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 st4b { z0.b - z3.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 st4b { z21.b - z24.b }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 st4b { z5.b - z8.b }, p3, [x17, x16]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 st4d { z0.d - z3.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 st4d { z21.d - z24.d }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 st4h { z0.h - z3.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 st4h { z21.h - z24.h }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 st4w { z0.s - z3.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 st4w { z21.s - z24.s }, p5, [x10, #20, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 - - - - - - - - 9.00 9.00 9.00 st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 9.00 2.25 2.25 2.25 2.25 2.25 2.25 2.25 2.25 9.00 9.00 9.00 st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1b { z0.b }, p0, [x0, x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1b { z0.b }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 stnt1b { z0.d }, p0, [z1.d]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 stnt1b { z0.s }, p0, [z1.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1b { z21.b }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1b { z23.b }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 stnt1b { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 stnt1b { z31.d }, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 stnt1b { z31.s }, p7, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 stnt1b { z31.s }, p7, [z31.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1d { z0.d }, p0, [x0, x0, lsl #3]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1d { z0.d }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 stnt1d { z0.d }, p0, [z1.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1d { z21.d }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1d { z23.d }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 stnt1d { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 stnt1d { z31.d }, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 stnt1h { z0.d }, p0, [z1.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.13 0.50 0.50 0.50 stnt1h { z0.h }, p0, [x0, x0, lsl #1]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1h { z0.h }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 stnt1h { z0.s }, p0, [z1.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1h { z21.h }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1h { z23.h }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 stnt1h { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 stnt1h { z31.d }, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 stnt1h { z31.s }, p7, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 stnt1h { z31.s }, p7, [z31.s]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 stnt1w { z0.d }, p0, [z1.d]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1w { z0.s }, p0, [x0, x0, lsl #2]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1w { z0.s }, p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 stnt1w { z0.s }, p0, [z1.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1w { z21.s }, p5, [x10, #7, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 stnt1w { z23.s }, p3, [x13, #-8, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 stnt1w { z31.d }, p7, [z31.d, x0]
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - 1.00 1.00 1.00 stnt1w { z31.d }, p7, [z31.d]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 stnt1w { z31.s }, p7, [z31.s, x0]
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - 2.00 2.00 2.00 stnt1w { z31.s }, p7, [z31.s]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 - - str p0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 - - str p15, [sp, #-256, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 - - str p5, [x10, #255, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 str z0, [x0]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 str z21, [x10, #-256, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - - - - - - - - 0.50 0.50 0.50 str z31, [sp, #255, mul vl]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z21.b, p5/m, z21.b, z10.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z21.b, z10.b, z21.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z21.d, p5/m, z21.d, z10.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z21.d, z10.d, z21.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z21.h, p5/m, z21.h, z10.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z21.h, z10.h, z21.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z21.s, p5/m, z21.s, z10.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z21.s, z10.s, z21.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z23.b, p3/m, z23.b, z13.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z23.b, z13.b, z8.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z23.d, p3/m, z23.d, z13.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z23.d, z13.d, z8.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z23.h, p3/m, z23.h, z13.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z23.h, z13.h, z8.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z23.s, p3/m, z23.s, z13.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z23.s, z13.s, z8.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sub z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subhnb z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subhnb z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subhnb z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subhnt z0.b, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subhnt z0.h, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subhnt z0.s, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z0.b, p0/m, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z0.d, p0/m, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z0.h, p0/m, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z0.s, p0/m, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 subr z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sunpkhi z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sunpkhi z31.h, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sunpkhi z31.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sunpklo z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sunpklo z31.h, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sunpklo z31.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 suqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sxtb z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sxtb z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sxtb z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sxtb z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sxtb z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sxtb z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sxth z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sxth z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sxth z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sxth z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sxtw z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 sxtw z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbl z28.b, { z29.b, z30.b }, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbl z28.d, { z29.d, z30.d }, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbl z28.h, { z29.h, z30.h }, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbl z28.s, { z29.s, z30.s }, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbl z31.b, { z31.b }, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbl z31.d, { z31.d }, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbl z31.h, { z31.h }, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbl z31.s, { z31.s }, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbx z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbx z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbx z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 tbx z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - trn1 p15.b, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - trn1 p15.d, p15.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - trn1 p15.h, p15.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - trn1 p15.s, p15.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn1 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn1 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn1 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn1 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - trn2 p15.b, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - trn2 p15.d, p15.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - trn2 p15.h, p15.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - trn2 p15.s, p15.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn2 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn2 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn2 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 trn2 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaba z0.b, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaba z0.d, z1.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaba z0.h, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaba z0.s, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabalb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabalb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabalb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabalt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabalt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabalt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabd z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabd z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabd z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabd z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabdlb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabdlb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabdlb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabdlt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabdlt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uabdlt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uadalp z0.h, p0/m, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uadalp z29.s, p0/m, z30.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uadalp z30.d, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddlb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddlb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddlb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddlt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddlt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddlt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 5.00 uaddv d0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 uaddv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 3.00 uaddv d0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 3.00 uaddv d0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddwb z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddwb z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddwb z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddwt z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddwt z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uaddwt z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - ucvtf z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - ucvtf z0.d, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - ucvtf z0.h, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 4.00 - ucvtf z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - ucvtf z0.h, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - ucvtf z0.s, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - ucvtf z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 20.00 - udiv z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 11.00 - udiv z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 20.00 - udivr z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 11.00 - udivr z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - udot z0.d, z1.h, z15.h[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - udot z0.d, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 udot z0.s, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 udot z0.s, z1.b, z7.b[3]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uhsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umax z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umax z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umax z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umax z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umax z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umax z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umaxp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umaxp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umaxp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umaxp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 5.00 umaxv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 umaxv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 3.00 umaxv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 3.00 umaxv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umin z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umin z31.b, p7/m, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umin z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umin z31.d, p7/m, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umin z31.h, p7/m, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 umin z31.s, p7/m, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uminp z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uminp z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uminp z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uminp z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 5.00 uminv b0, p7, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 1.00 uminv d0, p7, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 3.00 uminv h0, p7, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 3.00 uminv s0, p7, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlalb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlalb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlalb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlalb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlalb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlalt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlalt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlalt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlalt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlalt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlslb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlslb z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlslb z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlslb z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlslb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlslt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlslt z0.d, z1.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlslt z0.h, z1.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlslt z0.s, z1.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umlslt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ummla z0.s, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umulh z0.b, p7/m, z0.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umulh z0.b, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - umulh z0.d, p7/m, z0.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umulh z0.h, p7/m, z0.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umulh z0.h, z1.h, z2.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umulh z0.s, p7/m, z0.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umulh z29.s, z30.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - umulh z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umullb z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umullb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umullb z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umullb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umullb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umullt z0.d, z1.s, z15.s[1]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umullt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umullt z0.s, z1.h, z7.h[7]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umullt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 1.00 - umullt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqadd z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecb w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecb w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecb w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecb w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecb x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecb x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecd w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecd w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecd w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecd w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecd x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecd x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqdecd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqdecd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqdecd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqdecd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdech w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdech w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdech w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdech w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdech x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdech x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdech x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdech x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdech x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqdech z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqdech z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqdech z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqdech z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecp wzr, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecp wzr, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecp wzr, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecp wzr, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecp x0, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecp x0, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecp x0, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecp x0, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 uqdecp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 uqdecp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 uqdecp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecw w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecw w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecw w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecw w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecw x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqdecw x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqdecw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqdecw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqdecw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqdecw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincb w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincb w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincb w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincb w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincb x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincb x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincb x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincb x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincb x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincd w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincd w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincd w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincd w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincd x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincd x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincd x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincd x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincd x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqincd z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqincd z0.d, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqincd z0.d, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqincd z0.d, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqinch w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqinch w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqinch w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqinch w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqinch x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqinch x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqinch x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqinch x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqinch x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqinch z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqinch z0.h, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqinch z0.h, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqinch z0.h, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincp wzr, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincp wzr, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincp wzr, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincp wzr, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincp x0, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincp x0, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincp x0, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincp x0, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 uqincp z0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 uqincp z0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 1.50 0.50 - - - - - - - 0.50 0.50 uqincp z0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincw w0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincw w0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincw w0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincw w0, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincw x0
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincw x0, #14
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincw x0, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincw x0, pow2
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uqincw x0, vl1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqincw z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqincw z0.s, all, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqincw z0.s, pow2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqincw z0.s, pow2, mul #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrnb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrnb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrnb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrnb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrnb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrnb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrnt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrnt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrnt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrnt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrnt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqrshrnt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl z0.b, p0/m, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl z0.d, p0/m, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl z0.h, p0/m, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl z0.s, p0/m, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl z31.b, p0/m, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl z31.d, p0/m, z31.d, #63
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl z31.h, p0/m, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshl z31.s, p0/m, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrnb z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrnb z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrnb z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrnb z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrnb z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrnb z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrnt z0.b, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrnt z0.h, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrnt z0.s, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrnt z31.b, z31.h, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrnt z31.h, z31.s, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqshrnt z31.s, z31.d, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.b, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.d, z0.d, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.d, z0.d, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.h, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.h, z0.h, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.s, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.s, z0.s, #0, lsl #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z31.b, z31.b, #255
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z31.d, z31.d, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z31.h, z31.h, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsub z31.s, z31.s, #65280
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsubr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsubr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsubr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqsubr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtnb z0.b, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtnb z0.h, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtnb z0.s, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtnt z0.b, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtnt z0.h, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uqxtnt z0.s, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - urecpe z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urhadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urhadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urhadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urhadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshl z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshl z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshl z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshl z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshlr z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshlr z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshlr z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshlr z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr z0.b, p0/m, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr z0.d, p0/m, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr z0.h, p0/m, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr z0.s, p0/m, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr z31.b, p0/m, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr z31.d, p0/m, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr z31.h, p0/m, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 urshr z31.s, p0/m, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 2.00 - ursqrte z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ursra z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ursra z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ursra z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ursra z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ursra z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ursra z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ursra z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 ursra z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushllb z0.d, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushllb z0.h, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushllb z0.s, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushllb z31.d, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushllb z31.h, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushllb z31.s, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushllt z0.d, z0.s, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushllt z0.h, z0.b, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushllt z0.s, z0.h, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushllt z31.d, z31.s, #31
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushllt z31.h, z31.b, #7
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 ushllt z31.s, z31.h, #15
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usmmla z0.s, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd z0.b, p0/m, z0.b, z1.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd z0.h, p0/m, z0.h, z1.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd z29.s, p7/m, z29.s, z30.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usqadd z31.d, p7/m, z31.d, z30.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 usra z0.b, z0.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 usra z0.d, z0.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 usra z0.h, z0.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 usra z0.s, z0.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 usra z31.b, z31.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 usra z31.d, z31.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 usra z31.h, z31.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - 1.00 usra z31.s, z31.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usublb z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usublb z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usublb z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usublt z0.h, z1.b, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usublt z29.s, z30.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usublt z31.d, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubwb z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubwb z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubwb z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubwt z0.h, z1.h, z2.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubwt z29.s, z30.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 usubwt z31.d, z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uunpkhi z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uunpkhi z31.h, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uunpkhi z31.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uunpklo z31.d, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uunpklo z31.h, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uunpklo z31.s, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uxtb z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uxtb z0.h, p0/m, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uxtb z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uxtb z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uxtb z31.h, p7/m, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uxtb z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uxth z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uxth z0.s, p0/m, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uxth z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uxth z31.s, p7/m, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uxtw z0.d, p0/m, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uxtw z31.d, p7/m, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uzp1 p15.b, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uzp1 p15.d, p15.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uzp1 p15.h, p15.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uzp1 p15.s, p15.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp1 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp1 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp1 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp1 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uzp2 p15.b, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uzp2 p15.d, p15.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uzp2 p15.h, p15.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - uzp2 p15.s, p15.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp2 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp2 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp2 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 uzp2 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilege p15.b, w0, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilege p15.b, wzr, w0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilege p15.b, x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilege p15.b, xzr, x0
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilege p15.d, w0, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilege p15.d, x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilege p15.h, w0, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilege p15.h, x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilege p15.s, w0, wzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilege p15.s, x0, xzr
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilerw p15.b, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilerw p15.d, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilerw p15.h, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilerw p15.s, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilewr p15.b, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilewr p15.d, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilewr p15.h, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 1.00 - - - - - - - - - whilewr p15.s, x30, x30
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - wrffr p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 1.00 - - - - - - - - - - wrffr p15.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xar z0.b, z0.b, z1.b, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xar z0.d, z0.d, z1.d, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xar z0.h, z0.h, z1.h, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xar z0.s, z0.s, z1.s, #1
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xar z31.b, z31.b, z30.b, #8
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xar z31.d, z31.d, z30.d, #64
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xar z31.h, z31.h, z30.h, #16
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 xar z31.s, z31.s, z30.s, #32
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip1 p0.b, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip1 p0.d, p0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip1 p0.h, p0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip1 p0.s, p0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip1 p15.b, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip1 p15.d, p15.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip1 p15.h, p15.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip1 p15.s, p15.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip1 z31.s, z31.s, z31.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip2 p0.b, p0.b, p0.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip2 p0.d, p0.d, p0.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip2 p0.h, p0.h, p0.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip2 p0.s, p0.s, p0.s
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip2 p15.b, p15.b, p15.b
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip2 p15.d, p15.d, p15.d
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip2 p15.h, p15.h, p15.h
+# CHECK-NEXT: - - - - - - - - - - - - 0.50 0.50 - - - - - - - - - zip2 p15.s, p15.s, p15.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 z0.b, z0.b, z0.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 z0.d, z0.d, z0.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 z0.h, z0.h, z0.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 z0.s, z0.s, z0.s
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 z31.b, z31.b, z31.b
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 z31.d, z31.d, z31.d
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 z31.h, z31.h, z31.h
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - 0.50 0.50 zip2 z31.s, z31.s, z31.s
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-writeback.s
new file mode 100644
index 0000000..b165bdd
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-writeback.s
@@ -0,0 +1,3979 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3ae --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s
+
+# LLVM-MCA-BEGIN G01
+ld1 { v1.1d }, [x27], #8
+ld1 { v1.2d }, [x27], #16
+ld1 { v1.2s }, [x27], #8
+ld1 { v1.4h }, [x27], #8
+ld1 { v1.4s }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G02
+ld1 { v1.8b }, [x27], #8
+ld1 { v1.8h }, [x27], #16
+ld1 { v1.16b }, [x27], #16
+ld1 { v1.1d }, [x27], x28
+ld1 { v1.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G03
+ld1 { v1.2s }, [x27], x28
+ld1 { v1.4h }, [x27], x28
+ld1 { v1.4s }, [x27], x28
+ld1 { v1.8b }, [x27], x28
+ld1 { v1.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G04
+ld1 { v1.16b }, [x27], x28
+ld1 { v1.1d, v2.1d }, [x27], #16
+ld1 { v1.2d, v2.2d }, [x27], #32
+ld1 { v1.2s, v2.2s }, [x27], #16
+ld1 { v1.4h, v2.4h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G05
+ld1 { v1.4s, v2.4s }, [x27], #32
+ld1 { v1.8b, v2.8b }, [x27], #16
+ld1 { v1.8h, v2.8h }, [x27], #32
+ld1 { v1.16b, v2.16b }, [x27], #32
+ld1 { v1.1d, v2.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G06
+ld1 { v1.2d, v2.2d }, [x27], x28
+ld1 { v1.2s, v2.2s }, [x27], x28
+ld1 { v1.4h, v2.4h }, [x27], x28
+ld1 { v1.4s, v2.4s }, [x27], x28
+ld1 { v1.8b, v2.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G07
+ld1 { v1.8h, v2.8h }, [x27], x28
+ld1 { v1.16b, v2.16b }, [x27], x28
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G08
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G09
+ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G10
+ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G11
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G12
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G13
+ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+ld1 { v1.b }[0], [x27], #1
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G14
+ld1 { v1.b }[8], [x27], #1
+ld1 { v1.b }[0], [x27], x28
+ld1 { v1.b }[8], [x27], x28
+ld1 { v1.h }[0], [x27], #2
+ld1 { v1.h }[4], [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G15
+ld1 { v1.h }[0], [x27], x28
+ld1 { v1.h }[4], [x27], x28
+ld1 { v1.s }[0], [x27], #4
+ld1 { v1.s }[0], [x27], x28
+ld1 { v1.d }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G16
+ld1 { v1.d }[0], [x27], x28
+ld1r { v1.1d }, [x27], #8
+ld1r { v1.2d }, [x27], #8
+ld1r { v1.2s }, [x27], #4
+ld1r { v1.4h }, [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G17
+ld1r { v1.4s }, [x27], #4
+ld1r { v1.8b }, [x27], #1
+ld1r { v1.8h }, [x27], #2
+ld1r { v1.16b }, [x27], #1
+ld1r { v1.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G18
+ld1r { v1.2d }, [x27], x28
+ld1r { v1.2s }, [x27], x28
+ld1r { v1.4h }, [x27], x28
+ld1r { v1.4s }, [x27], x28
+ld1r { v1.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G19
+ld1r { v1.8h }, [x27], x28
+ld1r { v1.16b }, [x27], x28
+ld2 { v1.2d, v2.2d }, [x27], #32
+ld2 { v1.2s, v2.2s }, [x27], #16
+ld2 { v1.4h, v2.4h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G20
+ld2 { v1.4s, v2.4s }, [x27], #32
+ld2 { v1.8b, v2.8b }, [x27], #16
+ld2 { v1.8h, v2.8h }, [x27], #32
+ld2 { v1.16b, v2.16b }, [x27], #32
+ld2 { v1.2d, v2.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G21
+ld2 { v1.2s, v2.2s }, [x27], x28
+ld2 { v1.4h, v2.4h }, [x27], x28
+ld2 { v1.4s, v2.4s }, [x27], x28
+ld2 { v1.8b, v2.8b }, [x27], x28
+ld2 { v1.8h, v2.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G22
+ld2 { v1.16b, v2.16b }, [x27], x28
+ld2 { v1.b, v2.b }[0], [x27], #2
+ld2 { v1.b, v2.b }[8], [x27], #2
+ld2 { v1.b, v2.b }[0], [x27], x28
+ld2 { v1.b, v2.b }[8], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G23
+ld2 { v1.h, v2.h }[0], [x27], #4
+ld2 { v1.h, v2.h }[4], [x27], #4
+ld2 { v1.h, v2.h }[0], [x27], x28
+ld2 { v1.h, v2.h }[4], [x27], x28
+ld2 { v1.s, v2.s }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G24
+ld2 { v1.s, v2.s }[0], [x27], x28
+ld2 { v1.d, v2.d }[0], [x27], #16
+ld2 { v1.d, v2.d }[0], [x27], x28
+ld2r { v1.1d, v2.1d }, [x27], #16
+ld2r { v1.2d, v2.2d }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G25
+ld2r { v1.2s, v2.2s }, [x27], #8
+ld2r { v1.4h, v2.4h }, [x27], #4
+ld2r { v1.4s, v2.4s }, [x27], #8
+ld2r { v1.8b, v2.8b }, [x27], #2
+ld2r { v1.8h, v2.8h }, [x27], #4
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G26
+ld2r { v1.16b, v2.16b }, [x27], #2
+ld2r { v1.1d, v2.1d }, [x27], x28
+ld2r { v1.2d, v2.2d }, [x27], x28
+ld2r { v1.2s, v2.2s }, [x27], x28
+ld2r { v1.4h, v2.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G27
+ld2r { v1.4s, v2.4s }, [x27], x28
+ld2r { v1.8b, v2.8b }, [x27], x28
+ld2r { v1.8h, v2.8h }, [x27], x28
+ld2r { v1.16b, v2.16b }, [x27], x28
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G29
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G30
+ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G31
+ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G32
+ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G33
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G34
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G35
+ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G36
+ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G37
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G38
+ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G39
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G40
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G41
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G42
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G43
+ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G44
+ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+ldp s1, s2, [x27], #248
+ldp d1, d2, [x27], #496
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G45
+ldp q1, q2, [x27], #992
+ldp s1, s2, [x27, #248]!
+ldp d1, d2, [x27, #496]!
+ldp q1, q2, [x27, #992]!
+ldp w1, w2, [x27], #248
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G46
+ldp x1, x2, [x27], #496
+ldp w1, w2, [x27, #248]!
+ldp x1, x2, [x27, #496]!
+ldpsw x1, x2, [x27], #248
+ldpsw x1, x2, [x27, #248]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G47
+ldr b1, [x27], #254
+ldr h1, [x27], #254
+ldr s1, [x27], #254
+ldr d1, [x27], #254
+ldr q1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G48
+ldr b1, [x27, #254]!
+ldr h1, [x27, #254]!
+ldr s1, [x27, #254]!
+ldr d1, [x27, #254]!
+ldr q1, [x27, #254]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G49
+ldr w1, [x27], #254
+ldr x1, [x27], #254
+ldr w1, [x27, #254]!
+ldr x1, [x27, #254]!
+ldrb w1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G50
+ldrb w1, [x27, #254]!
+ldrh w1, [x27], #254
+ldrh w1, [x27, #254]!
+ldrsb w1, [x27], #254
+ldrsb x1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G51
+ldrsb w1, [x27, #254]!
+ldrsb x1, [x27, #254]!
+ldrsh w1, [x27], #254
+ldrsh x1, [x27], #254
+ldrsh w1, [x27, #254]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G52
+ldrsh x1, [x27, #254]!
+ldrsw x1, [x27], #254
+ldrsw x1, [x27, #254]!
+st1 { v1.1d }, [x27], #8
+st1 { v1.2d }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G53
+st1 { v1.2s }, [x27], #8
+st1 { v1.4h }, [x27], #8
+st1 { v1.4s }, [x27], #16
+st1 { v1.8b }, [x27], #8
+st1 { v1.8h }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G54
+st1 { v1.16b }, [x27], #16
+st1 { v1.1d }, [x27], x28
+st1 { v1.2d }, [x27], x28
+st1 { v1.2s }, [x27], x28
+st1 { v1.4h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G55
+st1 { v1.4s }, [x27], x28
+st1 { v1.8b }, [x27], x28
+st1 { v1.8h }, [x27], x28
+st1 { v1.16b }, [x27], x28
+st1 { v1.1d, v2.1d }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G56
+st1 { v1.2d, v2.2d }, [x27], #32
+st1 { v1.2s, v2.2s }, [x27], #16
+st1 { v1.4h, v2.4h }, [x27], #16
+st1 { v1.4s, v2.4s }, [x27], #32
+st1 { v1.8b, v2.8b }, [x27], #16
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G57
+st1 { v1.8h, v2.8h }, [x27], #32
+st1 { v1.16b, v2.16b }, [x27], #32
+st1 { v1.1d, v2.1d }, [x27], x28
+st1 { v1.2d, v2.2d }, [x27], x28
+st1 { v1.2s, v2.2s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G58
+st1 { v1.4h, v2.4h }, [x27], x28
+st1 { v1.4s, v2.4s }, [x27], x28
+st1 { v1.8b, v2.8b }, [x27], x28
+st1 { v1.8h, v2.8h }, [x27], x28
+st1 { v1.16b, v2.16b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G59
+st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G60
+st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G61
+st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G62
+st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G63
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G64
+st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G65
+st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+st1 { v1.b }[0], [x27], #1
+st1 { v1.b }[8], [x27], #1
+st1 { v1.b }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G66
+st1 { v1.b }[8], [x27], x28
+st1 { v1.h }[0], [x27], #2
+st1 { v1.h }[4], [x27], #2
+st1 { v1.h }[0], [x27], x28
+st1 { v1.h }[4], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G67
+st1 { v1.s }[0], [x27], #4
+st1 { v1.s }[0], [x27], x28
+st1 { v1.d }[0], [x27], #8
+st1 { v1.d }[0], [x27], x28
+st2 { v1.2d, v2.2d }, [x27], #32
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G68
+st2 { v1.2s, v2.2s }, [x27], #16
+st2 { v1.4h, v2.4h }, [x27], #16
+st2 { v1.4s, v2.4s }, [x27], #32
+st2 { v1.8b, v2.8b }, [x27], #16
+st2 { v1.8h, v2.8h }, [x27], #32
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G69
+st2 { v1.16b, v2.16b }, [x27], #32
+st2 { v1.2d, v2.2d }, [x27], x28
+st2 { v1.2s, v2.2s }, [x27], x28
+st2 { v1.4h, v2.4h }, [x27], x28
+st2 { v1.4s, v2.4s }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G70
+st2 { v1.8b, v2.8b }, [x27], x28
+st2 { v1.8h, v2.8h }, [x27], x28
+st2 { v1.16b, v2.16b }, [x27], x28
+st2 { v1.b, v2.b }[0], [x27], #2
+st2 { v1.b, v2.b }[8], [x27], #2
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G71
+st2 { v1.b, v2.b }[0], [x27], x28
+st2 { v1.b, v2.b }[8], [x27], x28
+st2 { v1.h, v2.h }[0], [x27], #4
+st2 { v1.h, v2.h }[4], [x27], #4
+st2 { v1.h, v2.h }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G72
+st2 { v1.h, v2.h }[4], [x27], x28
+st2 { v1.s, v2.s }[0], [x27], #8
+st2 { v1.s, v2.s }[0], [x27], x28
+st2 { v1.d, v2.d }[0], [x27], #16
+st2 { v1.d, v2.d }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G73
+st2g x26, [x27], #4064
+st2g x26, [x27, #4064]!
+st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G74
+st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G75
+st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G76
+st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G77
+st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G78
+st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G79
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G80
+st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G81
+st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G82
+st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G83
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+stg x26, [x27], #4064
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G84
+stg x26, [x27, #4064]!
+stgp x1, x2, [x27], #992
+stgp x1, x2, [x27, #992]!
+stp s1, s2, [x27], #248
+stp d1, d2, [x27], #496
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G85
+stp q1, q2, [x27], #992
+stp s1, s2, [x27, #248]!
+stp d1, d2, [x27, #496]!
+stp q1, q2, [x27, #992]!
+stp w1, w2, [x27], #248
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G86
+stp x1, x2, [x27], #496
+stp w1, w2, [x27, #248]!
+stp x1, x2, [x27, #496]!
+str b1, [x27], #254
+str h1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G87
+str s1, [x27], #254
+str d1, [x27], #254
+str q1, [x27], #254
+str b1, [x27, #254]!
+str h1, [x27, #254]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G88
+str s1, [x27, #254]!
+str d1, [x27, #254]!
+str q1, [x27, #254]!
+str w1, [x27], #254
+str x1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G89
+str w1, [x27, #254]!
+str x1, [x27, #254]!
+strb w1, [x27], #254
+strb w1, [x27, #254]!
+strh w1, [x27], #254
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G90
+strh w1, [x27, #254]!
+stz2g x26, [x27], #4064
+stz2g x26, [x27, #4064]!
+stzg x26, [x27], #4064
+stzg x26, [x27, #4064]!
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN G91
+ldr x1, [x27], #254
+ldr x2, [x1], #254
+# LLVM-MCA-END
+
+# CHECK: [0] Code Region - G01
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.97
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.4s }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [1] Code Region - G02
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.97
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [2] Code Region - G03
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.97
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [3] Code Region - G04
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.76
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [4] Code Region - G05
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.95
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [5] Code Region - G06
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.95
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 3.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [6] Code Region - G07
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.54
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 4.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [7] Code Region - G08
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.94
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,2] .D=eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,4] . D==eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [8] Code Region - G09
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.94
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [9] Code Region - G10
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 608
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.62
+# CHECK-NEXT: IPC: 0.82
+# CHECK-NEXT: Block RThroughput: 5.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeeER. ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,4] . D==eeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [10] Code Region - G11
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 675
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.70
+# CHECK-NEXT: IPC: 0.74
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,1] D=eeeeeeeER . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,2] .D=eeeeeeeER . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,3] .D===eeeeeeeER. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,4] . D===eeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 3. 1 4.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 1 2.6 0.4 0.0 <total>
+
+# CHECK: [11] Code Region - G12
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 675
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.70
+# CHECK-NEXT: IPC: 0.74
+# CHECK-NEXT: Block RThroughput: 6.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,1] D=eeeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeeER . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D===eeeeeeeER. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,4] . D===eeeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.4 0.0 <total>
+
+# CHECK: [12] Code Region - G13
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1210
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.90
+# CHECK-NEXT: IPC: 0.41
+# CHECK-NEXT: Block RThroughput: 5.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 01
+
+# CHECK: [0,0] DeeeeeeeER. . .. ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeeER . .. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeeER . .. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D===eeeeeeeER . .. ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,4] . D=========eeeeeeeeER ld1 { v1.b }[0], [x27], #1
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 1 3.8 0.4 0.0 <total>
+
+# CHECK: [13] Code Region - G14
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld1 { v1.h }[4], [x27], #2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 1 16.6 0.2 0.0 <total>
+
+# CHECK: [14] Code Region - G15
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.37
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld1 { v1.d }[0], [x27], #8
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 1 16.6 0.2 0.0 <total>
+
+# CHECK: [15] Code Region - G16
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1203
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.25
+# CHECK-NEXT: IPC: 0.42
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld1r { v1.4h }, [x27], #2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.1d }, [x27], #8
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1r { v1.2d }, [x27], #8
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1r { v1.2s }, [x27], #4
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1r { v1.4h }, [x27], #2
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [16] Code Region - G17
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.94
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld1r { v1.1d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.8b }, [x27], #1
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1r { v1.8h }, [x27], #2
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1r { v1.16b }, [x27], #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1r { v1.1d }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [17] Code Region - G18
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.94
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld1r { v1.8b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.2s }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1r { v1.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1r { v1.4s }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1r { v1.8b }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [18] Code Region - G19
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 1900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.73
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.16b }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [19] Code Region - G20
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 2400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.71
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [20] Code Region - G21
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 2200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.31
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [21] Code Region - G22
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 3310
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.63
+# CHECK-NEXT: IPC: 0.15
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 1 16.2 0.2 0.0 <total>
+
+# CHECK: [22] Code Region - G23
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 1 16.2 0.2 0.0 <total>
+
+# CHECK: [23] Code Region - G24
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2603
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.77
+# CHECK-NEXT: IPC: 0.19
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012345678
+
+# CHECK: [0,0] DeeeeeeeeER . . . . ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D================eeeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,4] . D================eeeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 17.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16
+# CHECK-NEXT: 1 12.0 0.2 0.0 <total>
+
+# CHECK: [24] Code Region - G25
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.92
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [25] Code Region - G26
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 510
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.92
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [26] Code Region - G27
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 609
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.78
+# CHECK-NEXT: IPC: 0.82
+# CHECK-NEXT: Block RThroughput: 5.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [27] Code Region - G28
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 759
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.22
+# CHECK-NEXT: IPC: 0.66
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER .. ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,1] .DeeeeeeeeER .. ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,2] . D=eeeeeeeeER .. ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,3] . D=eeeeeeeeER.. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 2. 1 2.0 1.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 4. 1 3.0 1.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 1 1.8 0.6 0.0 <total>
+
+# CHECK: [28] Code Region - G29
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 759
+# CHECK-NEXT: Total uOps: 3300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.35
+# CHECK-NEXT: IPC: 0.66
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER .. ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,1] .DeeeeeeeeER .. ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: [0,2] . D=eeeeeeeeER .. ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . D=eeeeeeeeER.. ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 1.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 1 1.8 0.6 0.0 <total>
+
+# CHECK: [29] Code Region - G30
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2010
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.59
+# CHECK-NEXT: IPC: 0.25
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeeeeER . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: [0,2] . D=eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . D========eeeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,4] . D===============eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 9.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 4. 1 16.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 1 5.8 0.4 0.0 <total>
+
+# CHECK: [30] Code Region - G31
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] .D=======eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: [0,2] . D==============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,3] . D=====================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,4] . D============================eeeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 3. 1 22.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 4. 1 29.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 1 15.0 0.2 0.0 <total>
+
+# CHECK: [31] Code Region - G32
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 0.75
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] .D=======eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: [0,2] . D==============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D=====================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,4] . D============================eeeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 22.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 4. 1 29.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 1 15.0 0.2 0.0 <total>
+
+# CHECK: [32] Code Region - G33
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 759
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.22
+# CHECK-NEXT: IPC: 0.66
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER .. ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] .DeeeeeeeeER .. ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: [0,2] . D=eeeeeeeeER .. ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: [0,3] . D=eeeeeeeeER.. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24
+# CHECK-NEXT: 2. 1 2.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6
+# CHECK-NEXT: 4. 1 3.0 1.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12
+# CHECK-NEXT: 1 1.8 0.6 0.0 <total>
+
+# CHECK: [33] Code Region - G34
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 759
+# CHECK-NEXT: Total uOps: 3300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.35
+# CHECK-NEXT: IPC: 0.66
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER .. ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: [0,1] .DeeeeeeeeER .. ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: [0,2] . D=eeeeeeeeER .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: [0,3] . D=eeeeeeeeER.. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6
+# CHECK-NEXT: 2. 1 2.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 1.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 1 1.8 0.6 0.0 <total>
+
+# CHECK: [34] Code Region - G35
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 759
+# CHECK-NEXT: Total uOps: 3200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.22
+# CHECK-NEXT: IPC: 0.66
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER .. ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeeeeER .. ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,2] . D=eeeeeeeeER .. ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . D=eeeeeeeeER.. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 1.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 1.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 1 1.8 0.6 0.0 <total>
+
+# CHECK: [35] Code Region - G36
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 959
+# CHECK-NEXT: Total uOps: 4500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.69
+# CHECK-NEXT: IPC: 0.52
+# CHECK-NEXT: Block RThroughput: 9.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeeeeeER . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,2] . D=eeeeeeeeER . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,3] . D==eeeeeeeeER . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,4] . D===eeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 2. 1 2.0 1.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 3. 1 3.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 4. 1 4.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1 2.2 0.8 0.0 <total>
+
+# CHECK: [36] Code Region - G37
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1009
+# CHECK-NEXT: Total uOps: 4900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.86
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,1] .DeeeeeeeeeER . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,2] . D=eeeeeeeeeER . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,3] . DeeeeeeeeeER . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,4] . .D==eeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 2. 1 2.0 2.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 2.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 1 1.6 1.0 0.0 <total>
+
+# CHECK: [37] Code Region - G38
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1010
+# CHECK-NEXT: Total uOps: 4900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.85
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeeeeeER . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,2] . D==eeeeeeeeER. . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,3] . D==eeeeeeeeeER . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,4] . D===eeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 2.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 1 2.4 1.0 0.0 <total>
+
+# CHECK: [38] Code Region - G39
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,1] .D=======eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,2] . D==============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D=====================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,4] . D============================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 3. 1 22.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 4. 1 29.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 1 15.0 0.2 0.0 <total>
+
+# CHECK: [39] Code Region - G40
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 4003
+# CHECK-NEXT: Total uOps: 4000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.00
+# CHECK-NEXT: IPC: 0.12
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789 0123456789
+# CHECK-NEXT: Index 0123456789 0123456789 012
+
+# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,1] .D=======eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,2] . D==============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: [0,3] . D=====================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,4] . D============================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 3. 1 22.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 4. 1 29.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 1 15.0 0.2 0.0 <total>
+
+# CHECK: [40] Code Region - G41
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2103
+# CHECK-NEXT: Total uOps: 4100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.95
+# CHECK-NEXT: IPC: 0.24
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeER . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,1] .D=======eeeeeeeeER . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,2] . D=======eeeeeeeeER. . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,3] . D=========eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: [0,4] . D=========eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 2. 1 8.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 3. 1 10.0 2.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32
+# CHECK-NEXT: 4. 1 10.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16
+# CHECK-NEXT: 1 7.4 0.6 0.0 <total>
+
+# CHECK: [41] Code Region - G42
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1008
+# CHECK-NEXT: Total uOps: 4300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.27
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: [0,1] .DeeeeeeeeER . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: [0,2] . D==eeeeeeeeER. . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: [0,3] . D==eeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: [0,4] . D====eeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8
+# CHECK-NEXT: 4. 1 5.0 2.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4
+# CHECK-NEXT: 1 2.6 1.0 0.0 <total>
+
+# CHECK: [42] Code Region - G43
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1008
+# CHECK-NEXT: Total uOps: 4200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.17
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345678
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeeeeER . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,2] . D==eeeeeeeeER. . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . D==eeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,4] . D====eeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 4. 1 5.0 2.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 1 2.6 1.0 0.0 <total>
+
+# CHECK: [43] Code Region - G44
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 708
+# CHECK-NEXT: Total uOps: 3400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.80
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,2] . D==eeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . D==eeeeeeE-R ldp s1, s2, [x27], #248
+# CHECK-NEXT: [0,4] . D===eeeeeeER ldp d1, d2, [x27], #496
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 1.0 ldp s1, s2, [x27], #248
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ldp d1, d2, [x27], #496
+# CHECK-NEXT: 1 2.4 0.6 0.2 <total>
+
+# CHECK: [44] Code Region - G45
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 507
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.54
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.3
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER .. ldp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] D=eeeeeeER.. ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,2] .D=eeeeeeER. ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,3] .D==eeeeeeER ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,4] . D==eeeeE-R ldp w1, w2, [x27], #248
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldp s1, s2, [x27, #248]!
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldp d1, d2, [x27, #496]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ldp q1, q2, [x27, #992]!
+# CHECK-NEXT: 4. 1 3.0 0.0 1.0 ldp w1, w2, [x27], #248
+# CHECK-NEXT: 1 2.2 0.2 0.2 <total>
+
+# CHECK: [45] Code Region - G46
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 507
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.14
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER .. ldp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] D=eeeeER .. ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,2] D==eeeeER .. ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,3] .D==eeeeeER. ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: [0,4] . D==eeeeeER ldpsw x1, x2, [x27, #248]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldp x1, x2, [x27, #496]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27], #248
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ldpsw x1, x2, [x27, #248]!
+# CHECK-NEXT: 1 2.4 0.2 0.0 <total>
+
+# CHECK: [46] Code Region - G47
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.95
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27], #254
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ldr h1, [x27], #254
+# CHECK-NEXT: [0,2] D==eeeeeeER . ldr s1, [x27], #254
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ldr d1, [x27], #254
+# CHECK-NEXT: [0,4] .D===eeeeeeER ldr q1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldr h1, [x27], #254
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldr s1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ldr d1, [x27], #254
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ldr q1, [x27], #254
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [47] Code Region - G48
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 508
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.95
+# CHECK-NEXT: IPC: 0.98
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eeeeeeER. . ldr h1, [x27, #254]!
+# CHECK-NEXT: [0,2] D==eeeeeeER . ldr s1, [x27, #254]!
+# CHECK-NEXT: [0,3] .D==eeeeeeER. ldr d1, [x27, #254]!
+# CHECK-NEXT: [0,4] .D===eeeeeeER ldr q1, [x27, #254]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldr h1, [x27, #254]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldr s1, [x27, #254]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ldr d1, [x27, #254]!
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ldr q1, [x27, #254]!
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [48] Code Region - G49
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldr w1, [x27], #254
+# CHECK-NEXT: [0,1] D=eeeeER . ldr x1, [x27], #254
+# CHECK-NEXT: [0,2] D==eeeeER . ldr w1, [x27, #254]!
+# CHECK-NEXT: [0,3] D===eeeeER. ldr x1, [x27, #254]!
+# CHECK-NEXT: [0,4] D====eeeeER ldrb w1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr w1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldr w1, [x27, #254]!
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldr x1, [x27, #254]!
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldrb w1, [x27], #254
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [49] Code Region - G50
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eeeeER . ldrh w1, [x27], #254
+# CHECK-NEXT: [0,2] D==eeeeER . ldrh w1, [x27, #254]!
+# CHECK-NEXT: [0,3] D===eeeeER. ldrsb w1, [x27], #254
+# CHECK-NEXT: [0,4] D====eeeeER ldrsb x1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldrh w1, [x27], #254
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldrh w1, [x27, #254]!
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldrsb w1, [x27], #254
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldrsb x1, [x27], #254
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [50] Code Region - G51
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 1000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 1.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 1.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldrsb w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eeeeER . ldrsb x1, [x27, #254]!
+# CHECK-NEXT: [0,2] D==eeeeER . ldrsh w1, [x27], #254
+# CHECK-NEXT: [0,3] D===eeeeER. ldrsh x1, [x27], #254
+# CHECK-NEXT: [0,4] D====eeeeER ldrsh w1, [x27, #254]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsb w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldrsb x1, [x27, #254]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldrsh w1, [x27], #254
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldrsh x1, [x27], #254
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldrsh w1, [x27, #254]!
+# CHECK-NEXT: 1 3.0 0.2 0.0 <total>
+
+# CHECK: [51] Code Region - G52
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1200
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.38
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 1.2
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eeeeER. ldrsw x1, [x27], #254
+# CHECK-NEXT: [0,2] D==eeeeER ldrsw x1, [x27, #254]!
+# CHECK-NEXT: [0,3] D===eeE-R st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: [0,4] .D===eeER st1 { v1.2d }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsh x1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldrsw x1, [x27], #254
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]!
+# CHECK-NEXT: 3. 1 4.0 0.0 1.0 st1 { v1.1d }, [x27], #8
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.2d }, [x27], #16
+# CHECK-NEXT: 1 2.8 0.2 0.2 <total>
+
+# CHECK: [52] Code Region - G53
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,2] D==eeER . st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: [0,4] .D===eeER st1 { v1.8h }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.8b }, [x27], #8
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.8h }, [x27], #16
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [53] Code Region - G54
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,2] D==eeER . st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: [0,4] .D===eeER st1 { v1.4h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.2s }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.4h }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [54] Code Region - G55
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,2] D==eeER . st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: [0,4] .D===eeER st1 { v1.1d, v2.1d }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.16b }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [55] Code Region - G56
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.77
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 3.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,4] . D==eeER st1 { v1.8b, v2.8b }, [x27], #16
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [56] Code Region - G57
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.17
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeER st1 { v1.2s, v2.2s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [57] Code Region - G58
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.17
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 4.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeER st1 { v1.16b, v2.16b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [58] Code Region - G59
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 2900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.13
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: [0,1] .DeeER . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,2] . DeeER . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,3] . D==eeER. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: [0,4] . D==eeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 3. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1 1.8 0.4 0.0 <total>
+
+# CHECK: [59] Code Region - G60
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 3100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.41
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,1] .DeeER . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,2] . DeeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,3] . D=eeER. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: [0,4] . D=eeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 3. 1 2.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 1 1.4 0.4 0.0 <total>
+
+# CHECK: [60] Code Region - G61
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 2900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.13
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] D=eeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . D=eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 1 2.0 0.4 0.0 <total>
+
+# CHECK: [61] Code Region - G62
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 3100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.40
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 6.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeER . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: [0,2] . D=eeER . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,3] . D=eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: [0,4] . D===eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32
+# CHECK-NEXT: 2. 1 2.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 4. 1 4.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1 2.0 0.6 0.0 <total>
+
+# CHECK: [62] Code Region - G63
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 804
+# CHECK-NEXT: Total uOps: 3700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.60
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,1] .DeeER .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,2] . D=eeER .. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,3] . D=eeER .. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: [0,4] . D===eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 2. 1 2.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 4. 1 4.0 2.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28
+# CHECK-NEXT: 1 2.0 0.8 0.0 <total>
+
+# CHECK: [63] Code Region - G64
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 3300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.69
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,2] .D==eeER . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . D==eeER. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,4] . D==eeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1 2.2 0.4 0.0 <total>
+
+# CHECK: [64] Code Region - G65
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 706
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 4.25
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,2] . D==eeeeER . st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: [0,3] . D===eeeeER. st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: [0,4] . D===eeeeER st1 { v1.b }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 st1 { v1.b }[0], [x27], #1
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.b }[8], [x27], #1
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.b }[0], [x27], x28
+# CHECK-NEXT: 1 2.6 0.6 0.0 <total>
+
+# CHECK: [65] Code Region - G66
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.95
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeER . st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: [0,2] .D=eeeeER . st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: [0,3] .D==eeeeER. st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeER st1 { v1.h }[4], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.h }[0], [x27], #2
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.h }[4], [x27], #2
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.h }[0], [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st1 { v1.h }[4], [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [66] Code Region - G67
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 605
+# CHECK-NEXT: Total uOps: 2300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.80
+# CHECK-NEXT: IPC: 0.83
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: [0,1] D=eeeeER . st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeER . st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: [0,3] .D==eeeeER. st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D==eeeeER st2 { v1.2d, v2.2d }, [x27], #32
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.s }[0], [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.d }[0], [x27], #8
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.d }[0], [x27], x28
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [67] Code Region - G68
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 705
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.69
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER .. st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,1] D=eeeeER .. st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,2] .D=eeeeER .. st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: [0,3] . D=eeeeER.. st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,4] . D==eeeeER st2 { v1.8h, v2.8h }, [x27], #32
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 4. 1 3.0 1.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 1 2.0 0.4 0.0 <total>
+
+# CHECK: [68] Code Region - G69
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 805
+# CHECK-NEXT: Total uOps: 2900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.60
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 8.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . . st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,1] .DeeeeER . . st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: [0,2] . D==eeeeER . st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: [0,3] . D===eeeeER. st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: [0,4] . D===eeeeER st2 { v1.4s, v2.4s }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28
+# CHECK-NEXT: 1 2.6 0.6 0.0 <total>
+
+# CHECK: [69] Code Region - G70
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 706
+# CHECK-NEXT: Total uOps: 2600
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.68
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . . st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeER . . st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: [0,2] . DeeeeER . . st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: [0,3] . D==eeeeER. st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: [0,4] . D===eeeeER st2 { v1.b, v2.b }[8], [x27], #2
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 2.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2
+# CHECK-NEXT: 1 2.0 0.6 0.0 <total>
+
+# CHECK: [70] Code Region - G71
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.95
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeER . st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: [0,2] .D=eeeeER . st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: [0,3] .D==eeeeER. st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: [0,4] . D==eeeeER st2 { v1.h, v2.h }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [71] Code Region - G72
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 506
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.95
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 5.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeER . st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: [0,2] .D=eeeeER . st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: [0,3] .D==eeeeER. st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: [0,4] . D==eeeeER st2 { v1.d, v2.d }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [72] Code Region - G73
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 807
+# CHECK-NEXT: Total uOps: 3000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.72
+# CHECK-NEXT: IPC: 0.62
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . . st2g x26, [x27], #4064
+# CHECK-NEXT: [0,1] D=eER. . . st2g x26, [x27, #4064]!
+# CHECK-NEXT: [0,2] .D=eeeeeeER . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: [0,3] . D=eeeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: [0,4] . D====eeeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2g x26, [x27], #4064
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2g x26, [x27, #4064]!
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48
+# CHECK-NEXT: 3. 1 2.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24
+# CHECK-NEXT: 4. 1 5.0 3.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24
+# CHECK-NEXT: 1 2.4 0.8 0.0 <total>
+
+# CHECK: [73] Code Region - G74
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1405
+# CHECK-NEXT: Total uOps: 4700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.35
+# CHECK-NEXT: IPC: 0.36
+# CHECK-NEXT: Block RThroughput: 14.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: [0,1] .DeeeeeER . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: [0,2] . D===eeeeeeER . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: [0,3] . D===eeeeeeER. . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: [0,4] . D=======eeeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24
+# CHECK-NEXT: 2. 1 4.0 3.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48
+# CHECK-NEXT: 4. 1 8.0 4.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28
+# CHECK-NEXT: 1 3.6 1.6 0.0 <total>
+
+# CHECK: [74] Code Region - G75
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1206
+# CHECK-NEXT: Total uOps: 4100
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.40
+# CHECK-NEXT: IPC: 0.41
+# CHECK-NEXT: Block RThroughput: 12.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234567
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeER . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeER . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: [0,2] . D==eeeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: [0,3] . D==eeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: [0,4] . D=====eeeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28
+# CHECK-NEXT: 4. 1 6.0 3.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28
+# CHECK-NEXT: 1 2.8 1.2 0.0 <total>
+
+# CHECK: [75] Code Region - G76
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1106
+# CHECK-NEXT: Total uOps: 3800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.44
+# CHECK-NEXT: IPC: 0.45
+# CHECK-NEXT: Block RThroughput: 11.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . .. st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeER . .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: [0,2] . D===eeeeeER .. st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: [0,3] . D===eeeeeER .. st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D=====eeeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3
+# CHECK-NEXT: 2. 1 4.0 3.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28
+# CHECK-NEXT: 4. 1 6.0 2.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28
+# CHECK-NEXT: 1 3.2 1.2 0.0 <total>
+
+# CHECK: [76] Code Region - G77
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1005
+# CHECK-NEXT: Total uOps: 3500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.48
+# CHECK-NEXT: IPC: 0.50
+# CHECK-NEXT: Block RThroughput: 10.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 012345
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeER . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: [0,1] .DeeeeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: [0,2] . D==eeeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D==eeeeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: [0,4] . D====eeeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28
+# CHECK-NEXT: 4. 1 5.0 2.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12
+# CHECK-NEXT: 1 2.6 1.0 0.0 <total>
+
+# CHECK: [77] Code Region - G78
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1304
+# CHECK-NEXT: Total uOps: 4300
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.30
+# CHECK-NEXT: IPC: 0.38
+# CHECK-NEXT: Block RThroughput: 13.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeER . .. st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: [0,1] .DeeeeeER . .. st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: [0,2] . D==eeeeeER .. st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: [0,3] . D==eeeeeER .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: [0,4] . D===eeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24
+# CHECK-NEXT: 2. 1 3.0 2.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64
+# CHECK-NEXT: 4. 1 4.0 2.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32
+# CHECK-NEXT: 1 2.4 1.0 0.0 <total>
+
+# CHECK: [78] Code Region - G79
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 2399
+# CHECK-NEXT: Total uOps: 6900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.88
+# CHECK-NEXT: IPC: 0.21
+# CHECK-NEXT: Block RThroughput: 24.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeeeER . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: [0,1] .DeeeeeeeER . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: [0,2] . D===eeeeeeER. . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: [0,3] . D========eeeeeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: [0,4] . .D=======eeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32
+# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64
+# CHECK-NEXT: 2. 1 4.0 4.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32
+# CHECK-NEXT: 3. 1 9.0 5.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64
+# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64
+# CHECK-NEXT: 1 4.6 2.0 0.0 <total>
+
+# CHECK: [79] Code Region - G80
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1904
+# CHECK-NEXT: Total uOps: 5700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.99
+# CHECK-NEXT: IPC: 0.26
+# CHECK-NEXT: Block RThroughput: 19.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 012
+
+# CHECK: [0,0] DeeeeeER . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeeeeeER . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: [0,2] . D=====eeeeeeER . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: [0,3] . D=====eeeeeeeER . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: [0,4] . .D========eeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28
+# CHECK-NEXT: 2. 1 6.0 5.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28
+# CHECK-NEXT: 3. 1 6.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28
+# CHECK-NEXT: 4. 1 9.0 4.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28
+# CHECK-NEXT: 1 4.6 2.2 0.0 <total>
+
+# CHECK: [80] Code Region - G81
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 1658
+# CHECK-NEXT: Total uOps: 4900
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.96
+# CHECK-NEXT: IPC: 0.30
+# CHECK-NEXT: Block RThroughput: 16.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0123456789
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeER. . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: [0,1] . DeeeeeeeER . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: [0,2] . D========eeeeeeER . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: [0,3] . D==========eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: [0,4] . D==========eeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28
+# CHECK-NEXT: 1. 1 1.0 1.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28
+# CHECK-NEXT: 2. 1 9.0 9.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4
+# CHECK-NEXT: 3. 1 11.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4
+# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28
+# CHECK-NEXT: 1 6.6 2.4 0.0 <total>
+
+# CHECK: [81] Code Region - G82
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 757
+# CHECK-NEXT: Total uOps: 2500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.30
+# CHECK-NEXT: IPC: 0.66
+# CHECK-NEXT: Block RThroughput: 7.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 01234
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: [0,1] D=eeeeeeER. . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: [0,2] .D==eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: [0,3] .D===eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D====eeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8
+# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28
+# CHECK-NEXT: 4. 1 5.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28
+# CHECK-NEXT: 1 3.0 0.6 0.0 <total>
+
+# CHECK: [82] Code Region - G83
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 704
+# CHECK-NEXT: Total uOps: 2700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.84
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 7.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: [0,1] D=eeeeeeER. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: [0,2] .D==eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: [0,3] . D==eeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: [0,4] . D===eE--R stg x26, [x27], #4064
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28
+# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28
+# CHECK-NEXT: 4. 1 4.0 0.0 2.0 stg x26, [x27], #4064
+# CHECK-NEXT: 1 2.6 0.4 0.4 <total>
+
+# CHECK: [83] Code Region - G84
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.37
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . stg x26, [x27, #4064]!
+# CHECK-NEXT: [0,1] D=eER. . stgp x1, x2, [x27], #992
+# CHECK-NEXT: [0,2] D==eER . stgp x1, x2, [x27, #992]!
+# CHECK-NEXT: [0,3] .D==eeER. stp s1, s2, [x27], #248
+# CHECK-NEXT: [0,4] .D===eeER stp d1, d2, [x27], #496
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stg x26, [x27, #4064]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 stgp x1, x2, [x27], #992
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stgp x1, x2, [x27, #992]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 stp s1, s2, [x27], #248
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 stp d1, d2, [x27], #496
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [84] Code Region - G85
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 703
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.84
+# CHECK-NEXT: IPC: 0.71
+# CHECK-NEXT: Block RThroughput: 3.0
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeER. . stp q1, q2, [x27], #992
+# CHECK-NEXT: [0,1] D==eeER . stp s1, s2, [x27, #248]!
+# CHECK-NEXT: [0,2] .D==eeER . stp d1, d2, [x27, #496]!
+# CHECK-NEXT: [0,3] .D===eeER. stp q1, q2, [x27, #992]!
+# CHECK-NEXT: [0,4] . D====eER stp w1, w2, [x27], #248
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992
+# CHECK-NEXT: 1. 1 3.0 0.0 0.0 stp s1, s2, [x27, #248]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stp d1, d2, [x27, #496]!
+# CHECK-NEXT: 3. 1 4.0 0.0 0.0 stp q1, q2, [x27, #992]!
+# CHECK-NEXT: 4. 1 5.0 0.0 0.0 stp w1, w2, [x27], #248
+# CHECK-NEXT: 1 3.2 0.2 0.0 <total>
+
+# CHECK: [85] Code Region - G86
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 1700
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.37
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496
+# CHECK-NEXT: [0,1] D=eER. . stp w1, w2, [x27, #248]!
+# CHECK-NEXT: [0,2] D==eER . stp x1, x2, [x27, #496]!
+# CHECK-NEXT: [0,3] .D==eeER. str b1, [x27], #254
+# CHECK-NEXT: [0,4] .D===eeER str h1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp x1, x2, [x27], #496
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 stp w1, w2, [x27, #248]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stp x1, x2, [x27, #496]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 str b1, [x27], #254
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 str h1, [x27], #254
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [86] Code Region - G87
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 504
+# CHECK-NEXT: Total uOps: 2000
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.97
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345678
+
+# CHECK: [0,0] DeeER. . str s1, [x27], #254
+# CHECK-NEXT: [0,1] D=eeER . str d1, [x27], #254
+# CHECK-NEXT: [0,2] .D=eeER . str q1, [x27], #254
+# CHECK-NEXT: [0,3] .D==eeER. str b1, [x27, #254]!
+# CHECK-NEXT: [0,4] . D==eeER str h1, [x27, #254]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 str d1, [x27], #254
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str q1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 str b1, [x27, #254]!
+# CHECK-NEXT: 4. 1 3.0 0.0 0.0 str h1, [x27, #254]!
+# CHECK-NEXT: 1 2.2 0.2 0.0 <total>
+
+# CHECK: [87] Code Region - G88
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 503
+# CHECK-NEXT: Total uOps: 1800
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.58
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeeER. . str s1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eeER . str d1, [x27, #254]!
+# CHECK-NEXT: [0,2] .D=eeER. str q1, [x27, #254]!
+# CHECK-NEXT: [0,3] .D==eER. str w1, [x27], #254
+# CHECK-NEXT: [0,4] .D===eER str x1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 str d1, [x27, #254]!
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 str q1, [x27, #254]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 str w1, [x27], #254
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 str x1, [x27], #254
+# CHECK-NEXT: 1 2.4 0.2 0.0 <total>
+
+# CHECK: [88] Code Region - G89
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeER . . str w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER. . str x1, [x27, #254]!
+# CHECK-NEXT: [0,2] D==eER . strb w1, [x27], #254
+# CHECK-NEXT: [0,3] .D==eER. strb w1, [x27, #254]!
+# CHECK-NEXT: [0,4] .D===eER strh w1, [x27], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 str x1, [x27, #254]!
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 strb w1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 strb w1, [x27, #254]!
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 strh w1, [x27], #254
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [89] Code Region - G90
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 500
+# CHECK-NEXT: Total Cycles: 503
+# CHECK-NEXT: Total uOps: 1500
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 2.98
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 2.5
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 01234567
+
+# CHECK: [0,0] DeER . . strh w1, [x27, #254]!
+# CHECK-NEXT: [0,1] D=eER. . stz2g x26, [x27], #4064
+# CHECK-NEXT: [0,2] D==eER . stz2g x26, [x27, #4064]!
+# CHECK-NEXT: [0,3] .D==eER. stzg x26, [x27], #4064
+# CHECK-NEXT: [0,4] .D===eER stzg x26, [x27, #4064]!
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 strh w1, [x27, #254]!
+# CHECK-NEXT: 1. 1 2.0 0.0 0.0 stz2g x26, [x27], #4064
+# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stz2g x26, [x27, #4064]!
+# CHECK-NEXT: 3. 1 3.0 0.0 0.0 stzg x26, [x27], #4064
+# CHECK-NEXT: 4. 1 4.0 0.0 0.0 stzg x26, [x27, #4064]!
+# CHECK-NEXT: 1 2.6 0.2 0.0 <total>
+
+# CHECK: [90] Code Region - G91
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 110
+# CHECK-NEXT: Total uOps: 400
+
+# CHECK: Dispatch Width: 10
+# CHECK-NEXT: uOps Per Cycle: 3.64
+# CHECK-NEXT: IPC: 1.82
+# CHECK-NEXT: Block RThroughput: 0.7
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeeeER . ldr x1, [x27], #254
+# CHECK-NEXT: [0,1] D====eeeeER ldr x2, [x1], #254
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr x1, [x27], #254
+# CHECK-NEXT: 1. 1 5.0 0.0 0.0 ldr x2, [x1], #254
+# CHECK-NEXT: 1 3.0 0.5 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-zero-lat-movs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-zero-lat-movs.s
new file mode 100644
index 0000000..a0840dc
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-zero-lat-movs.s
@@ -0,0 +1,81 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v3ae -instruction-tables < %s | FileCheck %s
+
+mov x1, #0
+mov x1, xzr
+mov w1, #0
+mov w1, wzr
+fmov h1, wzr
+fmov h1, xzr
+fmov s1, wzr
+fmov d1, xzr
+movi d1, #0
+movi v1.2d, #0
+mov w1, w2
+mov x1, x2
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 0 0.10 mov x1, #0
+# CHECK-NEXT: 1 0 0.10 mov x1, xzr
+# CHECK-NEXT: 1 0 0.10 mov w1, #0
+# CHECK-NEXT: 1 0 0.10 mov w1, wzr
+# CHECK-NEXT: 1 0 0.10 fmov h1, wzr
+# CHECK-NEXT: 1 0 0.10 fmov h1, xzr
+# CHECK-NEXT: 1 0 0.10 fmov s1, wzr
+# CHECK-NEXT: 1 0 0.10 fmov d1, xzr
+# CHECK-NEXT: 1 0 0.10 movi d1, #0000000000000000
+# CHECK-NEXT: 1 0 0.10 movi v1.2d, #0000000000000000
+# CHECK-NEXT: 1 0 0.10 mov w1, w2
+# CHECK-NEXT: 1 0 0.10 mov x1, x2
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V3AEUnitB
+# CHECK-NEXT: [0.1] - V3AEUnitB
+# CHECK-NEXT: [0.2] - V3AEUnitB
+# CHECK-NEXT: [1.0] - V3AEUnitD
+# CHECK-NEXT: [1.1] - V3AEUnitD
+# CHECK-NEXT: [2.0] - V3AEUnitFlg
+# CHECK-NEXT: [2.1] - V3AEUnitFlg
+# CHECK-NEXT: [2.2] - V3AEUnitFlg
+# CHECK-NEXT: [2.3] - V3AEUnitFlg
+# CHECK-NEXT: [3.0] - V3AEUnitL12
+# CHECK-NEXT: [3.1] - V3AEUnitL12
+# CHECK-NEXT: [4] - V3AEUnitLS0
+# CHECK-NEXT: [5] - V3AEUnitM0
+# CHECK-NEXT: [6] - V3AEUnitM1
+# CHECK-NEXT: [7] - V3AEUnitS0
+# CHECK-NEXT: [8] - V3AEUnitS1
+# CHECK-NEXT: [9] - V3AEUnitS2
+# CHECK-NEXT: [10] - V3AEUnitS3
+# CHECK-NEXT: [11] - V3AEUnitS4
+# CHECK-NEXT: [12] - V3AEUnitS5
+# CHECK-NEXT: [13] - V3AEUnitST1
+# CHECK-NEXT: [14] - V3AEUnitV0
+# CHECK-NEXT: [15] - V3AEUnitV1
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15]
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [0.2] [1.0] [1.1] [2.0] [2.1] [2.2] [2.3] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] Instructions:
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mov x1, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mov x1, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mov w1, #0
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mov w1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - fmov h1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - fmov h1, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - fmov s1, wzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - fmov d1, xzr
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - movi d1, #0000000000000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - movi v1.2d, #0000000000000000
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mov w1, w2
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - mov x1, x2
diff --git a/llvm/unittests/IR/AbstractCallSiteTest.cpp b/llvm/unittests/IR/AbstractCallSiteTest.cpp
index ddb1091..623d1b3 100644
--- a/llvm/unittests/IR/AbstractCallSiteTest.cpp
+++ b/llvm/unittests/IR/AbstractCallSiteTest.cpp
@@ -6,8 +6,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/AsmParser/Parser.h"
#include "llvm/IR/AbstractCallSite.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Argument.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/SourceMgr.h"
@@ -51,5 +52,96 @@ TEST(AbstractCallSite, CallbackCall) {
EXPECT_TRUE(ACS);
EXPECT_TRUE(ACS.isCallbackCall());
EXPECT_TRUE(ACS.isCallee(CallbackUse));
+ EXPECT_EQ(ACS.getCalleeUseForCallback(), *CallbackUse);
EXPECT_EQ(ACS.getCalledFunction(), Callback);
+
+ // The callback metadata {CallbackNo, Arg0No, ..., isVarArg} = {1, -1, true}
+ EXPECT_EQ(ACS.getCallArgOperandNoForCallee(), 1);
+ // Though the callback metadata only specifies ONE unfixed argument No, the
+ // callback callee is vararg, hence the third arg is also considered as
+ // another arg for the callback.
+ EXPECT_EQ(ACS.getNumArgOperands(), 2u);
+ Argument *Param0 = Callback->getArg(0), *Param1 = Callback->getArg(1);
+ ASSERT_TRUE(Param0 && Param1);
+ EXPECT_EQ(ACS.getCallArgOperandNo(*Param0), -1);
+ EXPECT_EQ(ACS.getCallArgOperandNo(*Param1), 2);
+}
+
+TEST(AbstractCallSite, DirectCall) {
+ LLVMContext C;
+
+ const char *IR = "declare void @bar(i32 %x, i32 %y)\n"
+ "define void @foo() {\n"
+ " call void @bar(i32 1, i32 2)\n"
+ " ret void\n"
+ "}\n";
+
+ std::unique_ptr<Module> M = parseIR(C, IR);
+ ASSERT_TRUE(M);
+
+ Function *Callee = M->getFunction("bar");
+ ASSERT_NE(Callee, nullptr);
+
+ const Use *DirectCallUse = Callee->getSingleUndroppableUse();
+ ASSERT_NE(DirectCallUse, nullptr);
+
+ AbstractCallSite ACS(DirectCallUse);
+ EXPECT_TRUE(ACS);
+ EXPECT_TRUE(ACS.isDirectCall());
+ EXPECT_TRUE(ACS.isCallee(DirectCallUse));
+ EXPECT_EQ(ACS.getCalledFunction(), Callee);
+ EXPECT_EQ(ACS.getNumArgOperands(), 2u);
+ Argument *ArgX = Callee->getArg(0);
+ ASSERT_NE(ArgX, nullptr);
+ Value *CAO1 = ACS.getCallArgOperand(*ArgX);
+ Value *CAO2 = ACS.getCallArgOperand(0);
+ ASSERT_NE(CAO2, nullptr);
+ // The two call arg operands should be the same object, since they are both
+ // the first argument of the call.
+ EXPECT_EQ(CAO2, CAO1);
+
+ ConstantInt *FirstArgInt = dyn_cast<ConstantInt>(CAO2);
+ ASSERT_NE(FirstArgInt, nullptr);
+ EXPECT_EQ(FirstArgInt->getZExtValue(), 1ull);
+
+ EXPECT_EQ(ACS.getCallArgOperandNo(*ArgX), 0);
+ EXPECT_EQ(ACS.getCallArgOperandNo(0), 0);
+ EXPECT_EQ(ACS.getCallArgOperandNo(1), 1);
+}
+
+TEST(AbstractCallSite, IndirectCall) {
+ LLVMContext C;
+
+ const char *IR = "define void @foo(ptr %0) {\n"
+ " call void %0(i32 1, i32 2)\n"
+ " ret void\n"
+ "}\n";
+
+ std::unique_ptr<Module> M = parseIR(C, IR);
+ ASSERT_TRUE(M);
+
+ Function *Fun = M->getFunction("foo");
+ ASSERT_NE(Fun, nullptr);
+
+ Argument *ArgAsCallee = Fun->getArg(0);
+ ASSERT_NE(ArgAsCallee, nullptr);
+
+ const Use *IndCallUse = ArgAsCallee->getSingleUndroppableUse();
+ ASSERT_NE(IndCallUse, nullptr);
+
+ AbstractCallSite ACS(IndCallUse);
+ EXPECT_TRUE(ACS);
+ EXPECT_TRUE(ACS.isIndirectCall());
+ EXPECT_TRUE(ACS.isCallee(IndCallUse));
+ EXPECT_EQ(ACS.getCalledFunction(), nullptr);
+ EXPECT_EQ(ACS.getCalledOperand(), ArgAsCallee);
+ EXPECT_EQ(ACS.getNumArgOperands(), 2u);
+ Value *CalledOperand = ACS.getCallArgOperand(0);
+ ASSERT_NE(CalledOperand, nullptr);
+ ConstantInt *FirstArgInt = dyn_cast<ConstantInt>(CalledOperand);
+ ASSERT_NE(FirstArgInt, nullptr);
+ EXPECT_EQ(FirstArgInt->getZExtValue(), 1ull);
+
+ EXPECT_EQ(ACS.getCallArgOperandNo(0), 0);
+ EXPECT_EQ(ACS.getCallArgOperandNo(1), 1);
}
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index c1791dfa..59a9ea1 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -1132,7 +1132,8 @@ TEST_F(VPRecipeTest, CastVPWidenMemoryRecipeToVPUserAndVPDef) {
new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1));
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
- VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {}, {});
+ VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, Load->getAlign(), {},
+ {});
EXPECT_TRUE(isa<VPUser>(&Recipe));
VPRecipeBase *BaseR = &Recipe;
EXPECT_TRUE(isa<VPUser>(BaseR));
@@ -1249,7 +1250,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1));
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
- VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {}, {});
+ VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, Load->getAlign(),
+ {}, {});
EXPECT_FALSE(Recipe.mayHaveSideEffects());
EXPECT_TRUE(Recipe.mayReadFromMemory());
EXPECT_FALSE(Recipe.mayWriteToMemory());
@@ -1263,8 +1265,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *StoredV = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
- VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false, {},
- {});
+ VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false,
+ Store->getAlign(), {}, {});
EXPECT_TRUE(Recipe.mayHaveSideEffects());
EXPECT_FALSE(Recipe.mayReadFromMemory());
EXPECT_TRUE(Recipe.mayWriteToMemory());
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
index a5e3c39..8cd200c9 100644
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -29,6 +29,7 @@ Version changelog:
'none' and 'all'. 'smart' is the default.
5: Basic block labels are matched by FileCheck expressions
6: The semantics of TBAA checks has been incorporated in the check lines.
+7: Indent switch-cases correctly.
"""
DEFAULT_VERSION = 6
@@ -606,6 +607,7 @@ MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)")
DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)")
IS_DEBUG_RECORD_RE = re.compile(r"^(\s+)#dbg_")
+IS_SWITCH_CASE_RE = re.compile(r"^\s+i\d+ \d+, label %\w+")
SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)")
SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M)
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
index 327b80b..3b0f38a 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
@@ -57,6 +57,7 @@ static_library("readability") {
"RedundantSmartptrGetCheck.cpp",
"RedundantStringCStrCheck.cpp",
"RedundantStringInitCheck.cpp",
+ "RedundantTypenameCheck.cpp",
"ReferenceToConstructedTemporaryCheck.cpp",
"SimplifyBooleanExprCheck.cpp",
"SimplifySubscriptExprCheck.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn
index 937e81b..c4ce990 100644
--- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn
@@ -12,8 +12,6 @@ static_library("TargetProcess") {
"JITLoaderGDB.cpp",
"JITLoaderPerf.cpp",
"JITLoaderVTune.cpp",
- "LibraryResolver.cpp",
- "LibraryScanner.cpp",
"OrcRTBootstrap.cpp",
"RegisterEHFrames.cpp",
"SimpleExecutorDylibManager.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/MC/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/MC/BUILD.gn
index 9d518df..98accc8 100644
--- a/llvm/utils/gn/secondary/llvm/lib/MC/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/MC/BUILD.gn
@@ -4,6 +4,7 @@ static_library("MC") {
"//llvm/include/llvm/Config:config",
"//llvm/lib/BinaryFormat",
"//llvm/lib/DebugInfo/CodeView",
+ "//llvm/lib/DebugInfo/DWARF/LowLevel",
"//llvm/lib/Support",
"//llvm/lib/TargetParser",
]
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-remarkutil/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-remarkutil/BUILD.gn
index 2d02c15..d194f3d 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-remarkutil/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-remarkutil/BUILD.gn
@@ -13,6 +13,7 @@ executable("llvm-remarkutil") {
"RemarkFilter.cpp",
"RemarkInstructionMix.cpp",
"RemarkSizeDiff.cpp",
+ "RemarkSummary.cpp",
"RemarkUtil.cpp",
"RemarkUtilHelpers.cpp",
"RemarkUtilRegistry.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn
index 8d19d30..1ba8d25 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn
@@ -71,6 +71,7 @@ unittest("ADTTests") {
"PointerUnionTest.cpp",
"PostOrderIteratorTest.cpp",
"PriorityWorklistTest.cpp",
+ "RadixTreeTest.cpp",
"RangeAdapterTest.cpp",
"RewriteBufferTest.cpp",
"SCCIteratorTest.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn
index 111e4c9..dfe6d6d 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn
@@ -24,7 +24,6 @@ unittest("OrcJITTests") {
"JITLinkRedirectionManagerTest.cpp",
"JITTargetMachineBuilderTest.cpp",
"LazyCallThroughAndReexportsTest.cpp",
- "LibraryResolverTest.cpp",
"LookupAndRecordAddrsTest.cpp",
"MachOPlatformTest.cpp",
"MapperJITLinkMemoryManagerTest.cpp",
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index f883145..9fba96a 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -945,7 +945,7 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
path = (
cmd_shenv.env["PATH"] if "PATH" in cmd_shenv.env else shenv.env["PATH"]
)
- executable = lit.util.which(args[0], shenv.env["PATH"])
+ executable = lit.util.which(args[0], path)
if not executable:
raise InternalShellError(j, "%r: command not found" % args[0])
diff --git a/llvm/utils/lit/tests/Inputs/shtest-env-path/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-env-path/lit.cfg
new file mode 100644
index 0000000..36517f9
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/shtest-env-path/lit.cfg
@@ -0,0 +1,8 @@
+import lit.formats
+
+config.name = "shtest-env-path"
+config.suffixes = [".txt"]
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None
+config.substitutions.append(("%{python}", '"%s"' % (sys.executable)))
diff --git a/llvm/utils/lit/tests/Inputs/shtest-env-path/path.txt b/llvm/utils/lit/tests/Inputs/shtest-env-path/path.txt
new file mode 100644
index 0000000..b36e861
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/shtest-env-path/path.txt
@@ -0,0 +1,8 @@
+## Tests env command for setting the PATH variable.
+
+## Check that test.sh can be found using the configured PATH.
+#
+# RUN: env PATH=%S test.sh | FileCheck --check-prefix=CHECK %s
+#
+
+# CHECK: TEST-ENV-PATH-123
diff --git a/llvm/utils/lit/tests/Inputs/shtest-env-path/test.sh b/llvm/utils/lit/tests/Inputs/shtest-env-path/test.sh
new file mode 100755
index 0000000..a1e46fc
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/shtest-env-path/test.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+echo "TEST-ENV-PATH-123"
+
diff --git a/llvm/utils/lit/tests/shtest-env-path.py b/llvm/utils/lit/tests/shtest-env-path.py
new file mode 100644
index 0000000..bf459ae
--- /dev/null
+++ b/llvm/utils/lit/tests/shtest-env-path.py
@@ -0,0 +1,13 @@
+## Tests env command for setting the PATH variable.
+
+# The test is using /bin/sh. Limit to system known to have /bin/sh.
+# REQUIRES: system-linux
+
+# RUN: %{lit} -a -v %{inputs}/shtest-env-path/path.txt \
+# RUN: | FileCheck -match-full-lines %s
+#
+# END.
+
+# CHECK: -- Testing: 1 tests{{.*}}
+# CHECK: PASS: shtest-env-path :: path.txt (1 of 1)
+# CHECK: --
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index 3d07b16..aef7c09 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -550,6 +550,7 @@ tools/UpdateTestChecks/update_test_checks/stable_ir_values5.test
tools/UpdateTestChecks/update_test_checks/stable_ir_values6.test
tools/UpdateTestChecks/update_test_checks/stable_ir_values_funcs.test
tools/UpdateTestChecks/update_test_checks/stable_ir_values.test
+tools/UpdateTestChecks/update_test_checks/switch_case.test
tools/UpdateTestChecks/update_test_checks/tbaa-semantics-checks.test
tools/UpdateTestChecks/update_test_checks/various_ir_values_dbgrecords.test
Transforms/AtomicExpand/AArch64/atomicrmw-fp.ll
diff --git a/llvm/utils/update_test_checks.py b/llvm/utils/update_test_checks.py
index 3b562fb..42227b2 100755
--- a/llvm/utils/update_test_checks.py
+++ b/llvm/utils/update_test_checks.py
@@ -260,9 +260,17 @@ def update_test(ti: common.TestInfo):
skip_same_checks=dropped_previous_line,
):
# This input line of the function body will go as-is into the output.
- # Except make leading whitespace uniform: 2 spaces. 4 for debug records.
+ # Except make leading whitespace uniform: 2 spaces. 4 for debug records/switch cases.
indent = (
- " " if not common.IS_DEBUG_RECORD_RE.match(input_line) else " "
+ " " * 4
+ if (
+ common.IS_DEBUG_RECORD_RE.match(input_line)
+ or (
+ ti.args.version > 6
+ and common.IS_SWITCH_CASE_RE.match(input_line)
+ )
+ )
+ else " " * 2
)
input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(indent, input_line)
output_lines.append(input_line)