aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGaëtan Bossu <gaetan.bossu@arm.com>2025-08-01 11:32:58 +0000
committerGaëtan Bossu <gaetan.bossu@arm.com>2025-08-01 16:43:33 +0000
commitf573d2e983e34a2f99a37976d4956e7aa7c62acd (patch)
treedc5102ed171608943b1993eec7ab168b588bb868
parentebcb4929004ae3f08b2ca3d5d246f29aa73600e1 (diff)
downloadllvm-users/gbossu.vector.extract.2.zip
llvm-users/gbossu.vector.extract.2.tar.gz
llvm-users/gbossu.vector.extract.2.tar.bz2
[AArch64][ISel] Select constructive SVE2 ext instructionusers/gbossu.vector.extract.2
This adds patterns for selecting EXT_ZZI_B. They are tested for fixed vectors using extract shuffles, and for scalable vectors using llvm.vector.splice intrinsics. We will get better codegen when enabling subreg liveness. Without it, any use of a zpr2 tuple is always considered as using both zpr registers of the pair.
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td16
-rw-r--r--llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll10
-rw-r--r--llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll91
-rw-r--r--llvm/test/CodeGen/AArch64/sve-pr92779.ll17
-rw-r--r--llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll24
-rw-r--r--llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll32
-rw-r--r--llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll32
-rw-r--r--llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll504
-rw-r--r--llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll1826
-rw-r--r--llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll624
-rw-r--r--llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll22
-rw-r--r--llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll96
-rw-r--r--llvm/test/CodeGen/AArch64/sve-vector-splice.ll253
-rw-r--r--llvm/test/CodeGen/AArch64/sve2-fixed-length-extract-subvector.ll79
14 files changed, 2236 insertions, 1390 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 0c4b4f4..201dd93 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4069,6 +4069,22 @@ let Predicates = [HasSVE2_or_SME] in {
let AddedComplexity = 2 in {
def : Pat<(nxv16i8 (AArch64ext nxv16i8:$zn1, nxv16i8:$zn2, (i32 imm0_255:$imm))),
(EXT_ZZI_B (REG_SEQUENCE ZPR2, $zn1, zsub0, $zn2, zsub1), imm0_255:$imm)>;
+
+ foreach VT = [nxv16i8] in
+ def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_255 i32:$index)))),
+ (EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
+
+ foreach VT = [nxv8i16, nxv8f16, nxv8bf16] in
+ def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_127 i32:$index)))),
+ (EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
+
+ foreach VT = [nxv4i32, nxv4f16, nxv4f32, nxv4bf16] in
+ def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_63 i32:$index)))),
+ (EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
+
+ foreach VT = [nxv2i64, nxv2f16, nxv2f32, nxv2f64, nxv2bf16] in
+ def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_31 i32:$index)))),
+ (EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>;
}
} // End HasSVE2_or_SME
diff --git a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
index 50975d1..13bec60 100644
--- a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
+++ b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
@@ -192,7 +192,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SVE2p1-NEXT: mov z1.s, p0/z, #1 // =0x1
; CHECK-SVE2p1-NEXT: fmov s0, w8
; CHECK-SVE2p1-NEXT: mov v0.s[1], v1.s[1]
-; CHECK-SVE2p1-NEXT: ext z1.b, z1.b, z0.b, #8
+; CHECK-SVE2p1-NEXT: ext z1.b, { z1.b, z2.b }, #8
; CHECK-SVE2p1-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SVE2p1-NEXT: // kill: def $d1 killed $d1 killed $z1
; CHECK-SVE2p1-NEXT: b use
@@ -202,12 +202,12 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
; CHECK-SME2-NEXT: whilelo p0.s, x0, x1
; CHECK-SME2-NEXT: cset w8, mi
; CHECK-SME2-NEXT: mov z1.s, p0/z, #1 // =0x1
-; CHECK-SME2-NEXT: fmov s2, w8
+; CHECK-SME2-NEXT: fmov s3, w8
; CHECK-SME2-NEXT: mov z0.s, z1.s[1]
-; CHECK-SME2-NEXT: zip1 z0.s, z2.s, z0.s
-; CHECK-SME2-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-SME2-NEXT: ext z1.b, { z1.b, z2.b }, #8
; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
+; CHECK-SME2-NEXT: zip1 z0.s, z3.s, z0.s
+; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-SME2-NEXT: b use
%r = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %i, i64 %n)
%v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 0)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
index 33d5ac4..3e8b3a4 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
@@ -109,14 +109,13 @@ define <16 x i16> @two_way_i8_i16_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
; SME-LABEL: two_way_i8_i16_vl256:
; SME: // %bb.0:
; SME-NEXT: ldr z0, [x0]
-; SME-NEXT: ldr z1, [x1]
-; SME-NEXT: ldr z2, [x2]
-; SME-NEXT: umlalb z0.h, z2.b, z1.b
-; SME-NEXT: umlalt z0.h, z2.b, z1.b
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ldr z2, [x1]
+; SME-NEXT: ldr z3, [x2]
+; SME-NEXT: umlalb z0.h, z3.b, z2.b
+; SME-NEXT: umlalt z0.h, z3.b, z2.b
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <16 x i16>, ptr %accptr
%u = load <32 x i8>, ptr %uptr
@@ -232,14 +231,13 @@ define <8 x i32> @two_way_i16_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
; SME-LABEL: two_way_i16_i32_vl256:
; SME: // %bb.0:
; SME-NEXT: ldr z0, [x0]
-; SME-NEXT: ldr z1, [x1]
-; SME-NEXT: ldr z2, [x2]
-; SME-NEXT: umlalb z0.s, z2.h, z1.h
-; SME-NEXT: umlalt z0.s, z2.h, z1.h
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ldr z2, [x1]
+; SME-NEXT: ldr z3, [x2]
+; SME-NEXT: umlalb z0.s, z3.h, z2.h
+; SME-NEXT: umlalt z0.s, z3.h, z2.h
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <8 x i32>, ptr %accptr
%u = load <16 x i16>, ptr %uptr
@@ -355,14 +353,13 @@ define <4 x i64> @two_way_i32_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
; SME-LABEL: two_way_i32_i64_vl256:
; SME: // %bb.0:
; SME-NEXT: ldr z0, [x0]
-; SME-NEXT: ldr z1, [x1]
-; SME-NEXT: ldr z2, [x2]
-; SME-NEXT: umlalb z0.d, z2.s, z1.s
-; SME-NEXT: umlalt z0.d, z2.s, z1.s
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ldr z2, [x1]
+; SME-NEXT: ldr z3, [x2]
+; SME-NEXT: umlalb z0.d, z3.s, z2.s
+; SME-NEXT: umlalt z0.d, z3.s, z2.s
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <4 x i64>, ptr %accptr
%u = load <8 x i32>, ptr %uptr
@@ -644,13 +641,12 @@ define <8 x i32> @four_way_i8_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
; SME-LABEL: four_way_i8_i32_vl256:
; SME: // %bb.0:
; SME-NEXT: ldr z0, [x0]
-; SME-NEXT: ldr z1, [x1]
-; SME-NEXT: ldr z2, [x2]
-; SME-NEXT: udot z0.s, z2.b, z1.b
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ldr z2, [x1]
+; SME-NEXT: ldr z3, [x2]
+; SME-NEXT: udot z0.s, z3.b, z2.b
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <8 x i32>, ptr %accptr
%u = load <32 x i8>, ptr %uptr
@@ -689,13 +685,12 @@ define <8 x i32> @four_way_i8_i32_vl256_usdot(ptr %accptr, ptr %uptr, ptr %sptr)
; SME-LABEL: four_way_i8_i32_vl256_usdot:
; SME: // %bb.0:
; SME-NEXT: ldr z0, [x0]
-; SME-NEXT: ldr z1, [x1]
-; SME-NEXT: ldr z2, [x2]
-; SME-NEXT: usdot z0.s, z1.b, z2.b
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ldr z2, [x1]
+; SME-NEXT: ldr z3, [x2]
+; SME-NEXT: usdot z0.s, z2.b, z3.b
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <8 x i32>, ptr %accptr
%u = load <32 x i8>, ptr %uptr
@@ -822,13 +817,12 @@ define <4 x i64> @four_way_i16_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vsca
; SME-LABEL: four_way_i16_i64_vl256:
; SME: // %bb.0:
; SME-NEXT: ldr z0, [x0]
-; SME-NEXT: ldr z1, [x1]
-; SME-NEXT: ldr z2, [x2]
-; SME-NEXT: udot z0.d, z2.h, z1.h
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ldr z2, [x1]
+; SME-NEXT: ldr z3, [x2]
+; SME-NEXT: udot z0.d, z3.h, z2.h
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <4 x i64>, ptr %accptr
%u = load <16 x i16>, ptr %uptr
@@ -999,10 +993,9 @@ define <4 x i64> @four_way_i8_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
; SME-NEXT: ldr z0, [x0]
; SME-NEXT: uaddwb z0.d, z0.d, z2.s
; SME-NEXT: uaddwt z0.d, z0.d, z2.s
-; SME-NEXT: mov z1.d, z0.d
-; SME-NEXT: ext z1.b, z1.b, z0.b, #16
-; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
+; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1
+; SME-NEXT: mov z1.d, z2.d
; SME-NEXT: ret
%acc = load <4 x i64>, ptr %accptr
%u = load <32 x i8>, ptr %uptr
diff --git a/llvm/test/CodeGen/AArch64/sve-pr92779.ll b/llvm/test/CodeGen/AArch64/sve-pr92779.ll
index 3f34d79..427d390 100644
--- a/llvm/test/CodeGen/AArch64/sve-pr92779.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pr92779.ll
@@ -5,16 +5,15 @@ define void @main(ptr %0) {
; CHECK-LABEL: main:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uzp1 v0.2s, v1.2s, v0.2s
-; CHECK-NEXT: neg v0.2s, v0.2s
-; CHECK-NEXT: smov x8, v0.s[0]
-; CHECK-NEXT: smov x9, v0.s[1]
-; CHECK-NEXT: mov z1.d, p0/m, x8
-; CHECK-NEXT: mov z1.d, p0/m, x9
-; CHECK-NEXT: str z1, [x0]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; CHECK-NEXT: uzp1 v2.2s, v0.2s, v2.2s
+; CHECK-NEXT: neg v2.2s, v2.2s
+; CHECK-NEXT: smov x8, v2.s[0]
+; CHECK-NEXT: smov x9, v2.s[1]
+; CHECK-NEXT: mov z0.d, p0/m, x8
+; CHECK-NEXT: mov z0.d, p0/m, x9
+; CHECK-NEXT: str z0, [x0]
; CHECK-NEXT: ret
"entry":
%1 = bitcast <vscale x 2 x i64> zeroinitializer to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index 4d524bc..6fe6b8a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -228,25 +228,25 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) {
; CHECK-LABEL: load_sext_v4i32i256:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: sunpklo z1.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: sunpklo z2.d, z0.s
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: fmov x9, d1
-; CHECK-NEXT: mov z1.d, z1.d[1]
-; CHECK-NEXT: fmov x11, d0
-; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: fmov x9, d2
+; CHECK-NEXT: mov z2.d, z2.d[1]
; CHECK-NEXT: asr x10, x9, #63
+; CHECK-NEXT: fmov x11, d2
; CHECK-NEXT: stp x9, x10, [x8]
-; CHECK-NEXT: fmov x9, d1
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: asr x12, x11, #63
; CHECK-NEXT: stp x10, x10, [x8, #16]
-; CHECK-NEXT: stp x11, x12, [x8, #64]
+; CHECK-NEXT: stp x11, x12, [x8, #32]
; CHECK-NEXT: fmov x11, d0
; CHECK-NEXT: asr x10, x9, #63
-; CHECK-NEXT: stp x12, x12, [x8, #80]
-; CHECK-NEXT: stp x10, x10, [x8, #48]
+; CHECK-NEXT: stp x12, x12, [x8, #48]
+; CHECK-NEXT: stp x10, x10, [x8, #80]
; CHECK-NEXT: asr x12, x11, #63
-; CHECK-NEXT: stp x9, x10, [x8, #32]
+; CHECK-NEXT: stp x9, x10, [x8, #64]
; CHECK-NEXT: stp x12, x12, [x8, #112]
; CHECK-NEXT: stp x11, x12, [x8, #96]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
index 35dd827..7ef35f1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -78,8 +78,8 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) {
; CHECK-LABEL: extract_subvector_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -119,7 +119,7 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -138,8 +138,8 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) {
; CHECK-LABEL: extract_subvector_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -198,8 +198,8 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) {
define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) {
; CHECK-LABEL: extract_subvector_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -237,8 +237,8 @@ define void @extract_subvector_v8i32(ptr %a, ptr %b) {
define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) {
; CHECK-LABEL: extract_subvector_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -297,8 +297,8 @@ define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
define <4 x half> @extract_subvector_v8f16(<8 x half> %op) {
; CHECK-LABEL: extract_subvector_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -357,8 +357,8 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) {
define <2 x float> @extract_subvector_v4f32(<4 x float> %op) {
; CHECK-LABEL: extract_subvector_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -396,8 +396,8 @@ define void @extract_subvector_v8f32(ptr %a, ptr %b) {
define <1 x double> @extract_subvector_v2f64(<2 x double> %op) {
; CHECK-LABEL: extract_subvector_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index e3d0a72..bc9b037 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -74,14 +74,14 @@ define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) {
define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
; CHECK-LABEL: fcvt_v8f16_to_v8f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z1.s, z0.h
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #8
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: fcvt z1.s, p0/m, z1.h
+; CHECK-NEXT: uunpklo z1.s, z2.h
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: fcvt z1.s, p0/m, z1.h
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvt_v8f16_to_v8f32:
@@ -122,21 +122,21 @@ define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) {
; CHECK-LABEL: fcvt_v16f16_to_v16f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z2.s, z1.h
-; CHECK-NEXT: uunpklo z3.s, z0.h
+; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT: ext z0.b, { z1.b, z2.b }, #8
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: ext z5.b, { z3.b, z4.b }, #8
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: fcvt z2.s, p0/m, z2.h
-; CHECK-NEXT: fcvt z3.s, p0/m, z3.h
+; CHECK-NEXT: uunpklo z2.s, z3.h
; CHECK-NEXT: fcvt z1.s, p0/m, z1.h
+; CHECK-NEXT: uunpklo z3.s, z5.h
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
-; CHECK-NEXT: stp q3, q0, [x0]
-; CHECK-NEXT: stp q2, q1, [x0, #32]
+; CHECK-NEXT: fcvt z2.s, p0/m, z2.h
+; CHECK-NEXT: fcvt z3.s, p0/m, z3.h
+; CHECK-NEXT: stp q1, q0, [x0, #32]
+; CHECK-NEXT: stp q2, q3, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fcvt_v16f16_to_v16f32:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index ae7c676..0e34b2c 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -58,21 +58,21 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: sunpklo z3.h, z0.b
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: sunpklo z1.h, z1.b
-; CHECK-NEXT: sunpklo z0.h, z0.b
-; CHECK-NEXT: sunpklo z2.s, z1.h
-; CHECK-NEXT: sunpklo z3.s, z0.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: sunpklo z5.s, z3.h
+; CHECK-NEXT: sunpklo z0.s, z1.h
+; CHECK-NEXT: ext z1.b, { z1.b, z2.b }, #8
+; CHECK-NEXT: ext z2.b, { z3.b, z4.b }, #8
; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: sunpklo z2.s, z2.h
+; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: uzp1 z3.h, z1.h, z1.h
+; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h }
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -124,40 +124,40 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-LABEL: sdiv_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: sunpklo z2.h, z1.b
-; CHECK-NEXT: sunpklo z3.h, z0.b
+; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT: sunpklo z5.h, z1.b
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z4.s, z2.h
-; CHECK-NEXT: sunpklo z5.s, z3.h
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z1.h, z1.b
-; CHECK-NEXT: sunpklo z2.s, z2.h
-; CHECK-NEXT: sunpklo z3.s, z3.h
-; CHECK-NEXT: sunpklo z0.h, z0.b
-; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
-; CHECK-NEXT: sunpklo z5.s, z0.h
-; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: sunpklo z3.s, z1.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: ext z1.b, { z1.b, z2.b }, #8
+; CHECK-NEXT: sunpklo z16.h, z3.b
+; CHECK-NEXT: ext z2.b, { z3.b, z4.b }, #8
+; CHECK-NEXT: sunpklo z0.s, z5.h
+; CHECK-NEXT: ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT: sunpklo z3.h, z1.b
+; CHECK-NEXT: sunpklo z1.h, z2.b
+; CHECK-NEXT: ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: sunpklo z7.s, z16.h
+; CHECK-NEXT: sunpklo z5.s, z5.h
+; CHECK-NEXT: sunpklo z6.s, z6.h
+; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z7.s
+; CHECK-NEXT: sunpklo z7.s, z1.h
+; CHECK-NEXT: ext z1.b, { z1.b, z2.b }, #8
; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s
+; CHECK-NEXT: sunpklo z6.s, z3.h
+; CHECK-NEXT: ext z3.b, { z3.b, z4.b }, #8
+; CHECK-NEXT: sunpklo z2.s, z3.h
+; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s
+; CHECK-NEXT: sdiv z1.s, p0/m, z1.s, z2.s
; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h }
-; CHECK-NEXT: ptrue p0.b, vl8
+; CHECK-NEXT: ptrue p0.h, vl4
+; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z4.h, z6.h, z6.h
+; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h }
; CHECK-NEXT: uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z5.h, z1.h, z1.h
+; CHECK-NEXT: splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b
; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -241,73 +241,73 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
define void @sdiv_v32i8(ptr %a, ptr %b) {
; CHECK-LABEL: sdiv_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q6, q3, [x1]
+; CHECK-NEXT: ldp q18, q4, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldr q2, [x0, #16]
-; CHECK-NEXT: sunpklo z1.h, z3.b
-; CHECK-NEXT: sunpklo z4.h, z2.b
-; CHECK-NEXT: sunpklo z7.h, z6.b
-; CHECK-NEXT: sunpklo z0.s, z1.h
-; CHECK-NEXT: sunpklo z5.s, z4.h
-; CHECK-NEXT: sunpklo z17.s, z7.h
-; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z5.s
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
+; CHECK-NEXT: sunpklo z6.h, z4.b
+; CHECK-NEXT: sunpklo z16.h, z2.b
+; CHECK-NEXT: ext z4.b, { z4.b, z5.b }, #8
+; CHECK-NEXT: ext z2.b, { z2.b, z3.b }, #8
+; CHECK-NEXT: sunpklo z20.h, z18.b
+; CHECK-NEXT: ext z18.b, { z18.b, z19.b }, #8
+; CHECK-NEXT: sunpklo z3.h, z4.b
+; CHECK-NEXT: sunpklo z0.s, z6.h
+; CHECK-NEXT: sunpklo z1.s, z16.h
+; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ext z1.b, { z6.b, z7.b }, #8
+; CHECK-NEXT: ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: ldr q16, [x0]
; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: sunpklo z4.s, z4.h
-; CHECK-NEXT: sunpklo z3.h, z3.b
-; CHECK-NEXT: sunpklo z7.s, z7.h
-; CHECK-NEXT: sunpklo z6.h, z6.b
-; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z4.s
-; CHECK-NEXT: sunpklo z4.h, z2.b
+; CHECK-NEXT: sunpklo z6.s, z6.h
+; CHECK-NEXT: sunpklo z22.h, z16.b
+; CHECK-NEXT: ext z16.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: sunpklo z17.h, z18.b
+; CHECK-NEXT: sunpklo z24.s, z22.h
+; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z6.s
+; CHECK-NEXT: sunpklo z5.h, z2.b
; CHECK-NEXT: sunpklo z2.s, z3.h
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: sunpklo z5.s, z4.h
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
+; CHECK-NEXT: ext z3.b, { z3.b, z4.b }, #8
+; CHECK-NEXT: sunpklo z7.s, z5.h
+; CHECK-NEXT: ext z4.b, { z5.b, z6.b }, #8
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sunpklo z4.s, z4.h
-; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z5.s
-; CHECK-NEXT: ldr q5, [x0]
-; CHECK-NEXT: sunpklo z16.h, z5.b
-; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT: sunpklo z5.h, z5.b
-; CHECK-NEXT: sunpklo z18.s, z16.h
-; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
-; CHECK-NEXT: sunpklo z16.s, z16.h
+; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z7.s
+; CHECK-NEXT: sunpklo z7.s, z20.h
+; CHECK-NEXT: ext z20.b, { z20.b, z21.b }, #8
+; CHECK-NEXT: ext z21.b, { z22.b, z23.b }, #8
+; CHECK-NEXT: sunpklo z20.s, z20.h
+; CHECK-NEXT: sunpklo z21.s, z21.h
+; CHECK-NEXT: sdivr z20.s, p0/m, z20.s, z21.s
+; CHECK-NEXT: sunpklo z21.h, z16.b
+; CHECK-NEXT: sunpklo z16.s, z17.h
+; CHECK-NEXT: ext z17.b, { z17.b, z18.b }, #8
+; CHECK-NEXT: ext z18.b, { z21.b, z22.b }, #8
+; CHECK-NEXT: sunpklo z19.s, z21.h
+; CHECK-NEXT: sunpklo z17.s, z17.h
+; CHECK-NEXT: sunpklo z18.s, z18.h
+; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z24.s
+; CHECK-NEXT: sdivr z16.s, p0/m, z16.s, z19.s
+; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
+; CHECK-NEXT: uzp1 z7.h, z20.h, z20.h
; CHECK-NEXT: sdivr z17.s, p0/m, z17.s, z18.s
-; CHECK-NEXT: sunpklo z18.s, z5.h
-; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT: sunpklo z5.s, z5.h
-; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT: sunpklo z16.s, z6.h
-; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT: sunpklo z6.s, z6.h
-; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h
-; CHECK-NEXT: sdivr z16.s, p0/m, z16.s, z18.s
-; CHECK-NEXT: uzp1 z18.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z19.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z21.h, z7.h, z7.h
-; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT: uzp1 z0.h, z16.h, z16.h
+; CHECK-NEXT: uzp1 z18.h, z2.h, z2.h
; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT: uzp1 z4.h, z0.h, z0.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h
-; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT: splice z2.h, p0, { z20.h, z21.h }
+; CHECK-NEXT: uzp1 z5.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z0.h, z16.h, z16.h
+; CHECK-NEXT: splice z2.h, p0, { z6.h, z7.h }
+; CHECK-NEXT: uzp1 z1.h, z17.h, z17.h
; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT: splice z3.h, p0, { z18.h, z19.h }
-; CHECK-NEXT: splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: uzp1 z19.h, z3.h, z3.h
+; CHECK-NEXT: splice z3.h, p0, { z4.h, z5.h }
; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT: splice z1.h, p0, { z18.h, z19.h }
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
-; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b
; CHECK-NEXT: splice z0.b, p0, { z4.b, z5.b }
+; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b
; CHECK-NEXT: splice z1.b, p0, { z2.b, z3.b }
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
@@ -534,21 +534,21 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-LABEL: sdiv_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: sunpklo z2.s, z1.h
-; CHECK-NEXT: sunpklo z3.s, z0.h
+; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT: sunpklo z0.s, z1.h
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: ext z1.b, { z1.b, z2.b }, #8
+; CHECK-NEXT: ext z2.b, { z3.b, z4.b }, #8
+; CHECK-NEXT: sunpklo z5.s, z3.h
; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: sunpklo z2.s, z2.h
+; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: uzp1 z3.h, z1.h, z1.h
+; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -598,33 +598,33 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
define void @sdiv_v16i16(ptr %a, ptr %b) {
; CHECK-LABEL: sdiv_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q4, q1, [x1]
+; CHECK-NEXT: ldp q16, q2, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldr q0, [x0, #16]
-; CHECK-NEXT: sunpklo z2.s, z1.h
-; CHECK-NEXT: sunpklo z3.s, z0.h
-; CHECK-NEXT: sunpklo z5.s, z4.h
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: ldr q3, [x0]
-; CHECK-NEXT: sunpklo z4.s, z4.h
-; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: sunpklo z6.s, z3.h
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.s, z3.h
+; CHECK-NEXT: sunpklo z4.s, z2.h
+; CHECK-NEXT: sunpklo z5.s, z0.h
+; CHECK-NEXT: sunpklo z7.s, z16.h
+; CHECK-NEXT: ext z16.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: ext z2.b, { z2.b, z3.b }, #8
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
+; CHECK-NEXT: ldr q5, [x0]
+; CHECK-NEXT: sunpklo z1.s, z2.h
; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT: uzp1 z4.h, z5.h, z5.h
+; CHECK-NEXT: sunpklo z18.s, z5.h
+; CHECK-NEXT: ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT: sunpklo z6.s, z16.h
+; CHECK-NEXT: sunpklo z5.s, z5.h
+; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z18.s
+; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h
+; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z6.s
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z1.h, z7.h, z7.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: uzp1 z2.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z4.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: splice z1.h, p0, { z3.h, z4.h }
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -972,21 +972,21 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: uunpklo z3.h, z0.b
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: uunpklo z1.h, z1.b
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: uunpklo z2.s, z1.h
-; CHECK-NEXT: uunpklo z3.s, z0.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: uunpklo z5.s, z3.h
+; CHECK-NEXT: uunpklo z0.s, z1.h
+; CHECK-NEXT: ext z1.b, { z1.b, z2.b }, #8
+; CHECK-NEXT: ext z2.b, { z3.b, z4.b }, #8
; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: uunpklo z2.s, z2.h
+; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: uzp1 z3.h, z1.h, z1.h
+; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h }
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -1038,40 +1038,40 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-LABEL: udiv_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z2.h, z1.b
-; CHECK-NEXT: uunpklo z3.h, z0.b
+; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT: uunpklo z5.h, z1.b
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z4.s, z2.h
-; CHECK-NEXT: uunpklo z5.s, z3.h
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z1.h, z1.b
-; CHECK-NEXT: uunpklo z2.s, z2.h
-; CHECK-NEXT: uunpklo z3.s, z3.h
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s
-; CHECK-NEXT: uunpklo z5.s, z0.h
-; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: uunpklo z3.s, z1.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: ext z1.b, { z1.b, z2.b }, #8
+; CHECK-NEXT: uunpklo z16.h, z3.b
+; CHECK-NEXT: ext z2.b, { z3.b, z4.b }, #8
+; CHECK-NEXT: uunpklo z0.s, z5.h
+; CHECK-NEXT: ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT: uunpklo z3.h, z1.b
+; CHECK-NEXT: uunpklo z1.h, z2.b
+; CHECK-NEXT: ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: uunpklo z7.s, z16.h
+; CHECK-NEXT: uunpklo z5.s, z5.h
+; CHECK-NEXT: uunpklo z6.s, z6.h
+; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z7.s
+; CHECK-NEXT: uunpklo z7.s, z1.h
+; CHECK-NEXT: ext z1.b, { z1.b, z2.b }, #8
; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s
+; CHECK-NEXT: uunpklo z6.s, z3.h
+; CHECK-NEXT: ext z3.b, { z3.b, z4.b }, #8
+; CHECK-NEXT: uunpklo z2.s, z3.h
+; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s
+; CHECK-NEXT: udiv z1.s, p0/m, z1.s, z2.s
; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h }
-; CHECK-NEXT: ptrue p0.b, vl8
+; CHECK-NEXT: ptrue p0.h, vl4
+; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z4.h, z6.h, z6.h
+; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h }
; CHECK-NEXT: uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z5.h, z1.h, z1.h
+; CHECK-NEXT: splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b
; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -1155,73 +1155,73 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
define void @udiv_v32i8(ptr %a, ptr %b) {
; CHECK-LABEL: udiv_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q6, q3, [x1]
+; CHECK-NEXT: ldp q18, q4, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldr q2, [x0, #16]
-; CHECK-NEXT: uunpklo z1.h, z3.b
-; CHECK-NEXT: uunpklo z4.h, z2.b
-; CHECK-NEXT: uunpklo z7.h, z6.b
-; CHECK-NEXT: uunpklo z0.s, z1.h
-; CHECK-NEXT: uunpklo z5.s, z4.h
-; CHECK-NEXT: uunpklo z17.s, z7.h
-; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z5.s
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
+; CHECK-NEXT: uunpklo z6.h, z4.b
+; CHECK-NEXT: uunpklo z16.h, z2.b
+; CHECK-NEXT: ext z4.b, { z4.b, z5.b }, #8
+; CHECK-NEXT: ext z2.b, { z2.b, z3.b }, #8
+; CHECK-NEXT: uunpklo z20.h, z18.b
+; CHECK-NEXT: ext z18.b, { z18.b, z19.b }, #8
+; CHECK-NEXT: uunpklo z3.h, z4.b
+; CHECK-NEXT: uunpklo z0.s, z6.h
+; CHECK-NEXT: uunpklo z1.s, z16.h
+; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ext z1.b, { z6.b, z7.b }, #8
+; CHECK-NEXT: ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: ldr q16, [x0]
; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: uunpklo z4.s, z4.h
-; CHECK-NEXT: uunpklo z3.h, z3.b
-; CHECK-NEXT: uunpklo z7.s, z7.h
-; CHECK-NEXT: uunpklo z6.h, z6.b
-; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z4.s
-; CHECK-NEXT: uunpklo z4.h, z2.b
+; CHECK-NEXT: uunpklo z6.s, z6.h
+; CHECK-NEXT: uunpklo z22.h, z16.b
+; CHECK-NEXT: ext z16.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: uunpklo z17.h, z18.b
+; CHECK-NEXT: uunpklo z24.s, z22.h
+; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z6.s
+; CHECK-NEXT: uunpklo z5.h, z2.b
; CHECK-NEXT: uunpklo z2.s, z3.h
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: uunpklo z5.s, z4.h
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
+; CHECK-NEXT: ext z3.b, { z3.b, z4.b }, #8
+; CHECK-NEXT: uunpklo z7.s, z5.h
+; CHECK-NEXT: ext z4.b, { z5.b, z6.b }, #8
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: uunpklo z4.s, z4.h
-; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z5.s
-; CHECK-NEXT: ldr q5, [x0]
-; CHECK-NEXT: uunpklo z16.h, z5.b
-; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT: uunpklo z5.h, z5.b
-; CHECK-NEXT: uunpklo z18.s, z16.h
-; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
-; CHECK-NEXT: uunpklo z16.s, z16.h
+; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z7.s
+; CHECK-NEXT: uunpklo z7.s, z20.h
+; CHECK-NEXT: ext z20.b, { z20.b, z21.b }, #8
+; CHECK-NEXT: ext z21.b, { z22.b, z23.b }, #8
+; CHECK-NEXT: uunpklo z20.s, z20.h
+; CHECK-NEXT: uunpklo z21.s, z21.h
+; CHECK-NEXT: udivr z20.s, p0/m, z20.s, z21.s
+; CHECK-NEXT: uunpklo z21.h, z16.b
+; CHECK-NEXT: uunpklo z16.s, z17.h
+; CHECK-NEXT: ext z17.b, { z17.b, z18.b }, #8
+; CHECK-NEXT: ext z18.b, { z21.b, z22.b }, #8
+; CHECK-NEXT: uunpklo z19.s, z21.h
+; CHECK-NEXT: uunpklo z17.s, z17.h
+; CHECK-NEXT: uunpklo z18.s, z18.h
+; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z24.s
+; CHECK-NEXT: udivr z16.s, p0/m, z16.s, z19.s
+; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
+; CHECK-NEXT: uzp1 z7.h, z20.h, z20.h
; CHECK-NEXT: udivr z17.s, p0/m, z17.s, z18.s
-; CHECK-NEXT: uunpklo z18.s, z5.h
-; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT: uunpklo z5.s, z5.h
-; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT: uunpklo z16.s, z6.h
-; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT: uunpklo z6.s, z6.h
-; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h
-; CHECK-NEXT: udivr z16.s, p0/m, z16.s, z18.s
-; CHECK-NEXT: uzp1 z18.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z19.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z21.h, z7.h, z7.h
-; CHECK-NEXT: udiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT: uzp1 z0.h, z16.h, z16.h
+; CHECK-NEXT: uzp1 z18.h, z2.h, z2.h
; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT: uzp1 z4.h, z0.h, z0.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h
-; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT: splice z2.h, p0, { z20.h, z21.h }
+; CHECK-NEXT: uzp1 z5.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z0.h, z16.h, z16.h
+; CHECK-NEXT: splice z2.h, p0, { z6.h, z7.h }
+; CHECK-NEXT: uzp1 z1.h, z17.h, z17.h
; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT: splice z3.h, p0, { z18.h, z19.h }
-; CHECK-NEXT: splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: uzp1 z19.h, z3.h, z3.h
+; CHECK-NEXT: splice z3.h, p0, { z4.h, z5.h }
; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT: splice z1.h, p0, { z18.h, z19.h }
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
-; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b
; CHECK-NEXT: splice z0.b, p0, { z4.b, z5.b }
+; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b
; CHECK-NEXT: splice z1.b, p0, { z2.b, z3.b }
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
@@ -1448,21 +1448,21 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-LABEL: udiv_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z2.s, z1.h
-; CHECK-NEXT: uunpklo z3.s, z0.h
+; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT: uunpklo z0.s, z1.h
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: ext z1.b, { z1.b, z2.b }, #8
+; CHECK-NEXT: ext z2.b, { z3.b, z4.b }, #8
+; CHECK-NEXT: uunpklo z5.s, z3.h
; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: uunpklo z2.s, z2.h
+; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z2.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: uzp1 z3.h, z1.h, z1.h
+; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -1512,33 +1512,33 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
define void @udiv_v16i16(ptr %a, ptr %b) {
; CHECK-LABEL: udiv_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q4, q1, [x1]
+; CHECK-NEXT: ldp q16, q2, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldr q0, [x0, #16]
-; CHECK-NEXT: uunpklo z2.s, z1.h
-; CHECK-NEXT: uunpklo z3.s, z0.h
-; CHECK-NEXT: uunpklo z5.s, z4.h
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: ldr q3, [x0]
-; CHECK-NEXT: uunpklo z4.s, z4.h
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: uunpklo z6.s, z3.h
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.s, z3.h
+; CHECK-NEXT: uunpklo z4.s, z2.h
+; CHECK-NEXT: uunpklo z5.s, z0.h
+; CHECK-NEXT: uunpklo z7.s, z16.h
+; CHECK-NEXT: ext z16.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: ext z2.b, { z2.b, z3.b }, #8
+; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s
+; CHECK-NEXT: ldr q5, [x0]
+; CHECK-NEXT: uunpklo z1.s, z2.h
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT: uzp1 z4.h, z5.h, z5.h
+; CHECK-NEXT: uunpklo z18.s, z5.h
+; CHECK-NEXT: ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT: uunpklo z6.s, z16.h
+; CHECK-NEXT: uunpklo z5.s, z5.h
+; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z18.s
+; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h
+; CHECK-NEXT: udiv z5.s, p0/m, z5.s, z6.s
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z1.h, z7.h, z7.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: uzp1 z2.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z4.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: splice z1.h, p0, { z3.h, z4.h }
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index b022c19..eb8d612 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -14,19 +14,33 @@ target triple = "aarch64-unknown-linux-gnu"
; type's element type is not byte based and thus cannot be lowered directly to
; an SVE instruction.
define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
-; CHECK-LABEL: sext_v8i1_v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: uunpklo z1.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: lsl z1.s, z1.s, #31
-; CHECK-NEXT: lsl z0.s, z0.s, #31
-; CHECK-NEXT: asr z1.s, z1.s, #31
-; CHECK-NEXT: asr z0.s, z0.s, #31
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v8i1_v8i32:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT: uunpklo z0.h, z0.b
+; SVE-NEXT: uunpklo z1.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: lsl z1.s, z1.s, #31
+; SVE-NEXT: lsl z0.s, z0.s, #31
+; SVE-NEXT: asr z1.s, z1.s, #31
+; SVE-NEXT: asr z0.s, z0.s, #31
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v8i1_v8i32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT: uunpklo z0.h, z0.b
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: uunpklo z1.s, z2.h
+; SVE2-NEXT: lsl z0.s, z0.s, #31
+; SVE2-NEXT: lsl z1.s, z1.s, #31
+; SVE2-NEXT: asr z0.s, z0.s, #31
+; SVE2-NEXT: asr z1.s, z1.s, #31
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v8i1_v8i32:
; NONEON-NOSVE: // %bb.0:
@@ -70,19 +84,33 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
; type's element type is not power-of-2 based and thus cannot be lowered
; directly to an SVE instruction.
define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
-; CHECK-LABEL: sext_v4i3_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z1.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: lsl z1.d, z1.d, #61
-; CHECK-NEXT: lsl z0.d, z0.d, #61
-; CHECK-NEXT: asr z1.d, z1.d, #61
-; CHECK-NEXT: asr z0.d, z0.d, #61
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v4i3_v4i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: uunpklo z1.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.d, z0.s
+; SVE-NEXT: lsl z1.d, z1.d, #61
+; SVE-NEXT: lsl z0.d, z0.d, #61
+; SVE-NEXT: asr z1.d, z1.d, #61
+; SVE-NEXT: asr z0.d, z0.d, #61
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v4i3_v4i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: uunpklo z1.d, z2.s
+; SVE2-NEXT: lsl z0.d, z0.d, #61
+; SVE2-NEXT: lsl z1.d, z1.d, #61
+; SVE2-NEXT: asr z0.d, z0.d, #61
+; SVE2-NEXT: asr z1.d, z1.d, #61
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v4i3_v4i64:
; NONEON-NOSVE: // %bb.0:
@@ -113,14 +141,23 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
;
define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
-; CHECK-LABEL: sext_v16i8_v16i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: sunpklo z1.h, z0.b
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.h, z0.b
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v16i8_v16i16:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT: sunpklo z1.h, z0.b
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.h, z0.b
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v16i8_v16i16:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.h, z0.b
+; SVE2-NEXT: sunpklo z1.h, z2.b
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v16i8_v16i16:
; NONEON-NOSVE: // %bb.0:
@@ -171,20 +208,35 @@ define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
; NOTE: Extra 'add' is to prevent the extend being combined with the load.
define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
-; CHECK-LABEL: sext_v32i8_v32i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: add z0.b, z0.b, z0.b
-; CHECK-NEXT: add z1.b, z1.b, z1.b
-; CHECK-NEXT: sunpklo z2.h, z0.b
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.h, z1.b
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.h, z0.b
-; CHECK-NEXT: sunpklo z1.h, z1.b
-; CHECK-NEXT: stp q2, q0, [x1, #32]
-; CHECK-NEXT: stp q3, q1, [x1]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v32i8_v32i16:
+; SVE: // %bb.0:
+; SVE-NEXT: ldp q1, q0, [x0]
+; SVE-NEXT: add z0.b, z0.b, z0.b
+; SVE-NEXT: add z1.b, z1.b, z1.b
+; SVE-NEXT: sunpklo z2.h, z0.b
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z3.h, z1.b
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.h, z0.b
+; SVE-NEXT: sunpklo z1.h, z1.b
+; SVE-NEXT: stp q2, q0, [x1, #32]
+; SVE-NEXT: stp q3, q1, [x1]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v32i8_v32i16:
+; SVE2: // %bb.0:
+; SVE2-NEXT: ldp q1, q0, [x0]
+; SVE2-NEXT: add z2.b, z0.b, z0.b
+; SVE2-NEXT: add z0.b, z1.b, z1.b
+; SVE2-NEXT: ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z2.h, z2.b
+; SVE2-NEXT: sunpklo z0.h, z0.b
+; SVE2-NEXT: sunpklo z3.h, z4.b
+; SVE2-NEXT: sunpklo z1.h, z5.b
+; SVE2-NEXT: stp q0, q1, [x1]
+; SVE2-NEXT: stp q2, q3, [x1, #32]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v32i8_v32i16:
; NONEON-NOSVE: // %bb.0:
@@ -365,15 +417,25 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
;
define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
-; CHECK-LABEL: sext_v8i8_v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: sunpklo z0.h, z0.b
-; CHECK-NEXT: sunpklo z1.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v8i8_v8i32:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT: sunpklo z0.h, z0.b
+; SVE-NEXT: sunpklo z1.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.s, z0.h
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v8i8_v8i32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT: sunpklo z0.h, z0.b
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.s, z0.h
+; SVE2-NEXT: sunpklo z1.s, z2.h
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v8i8_v8i32:
; NONEON-NOSVE: // %bb.0:
@@ -402,21 +464,37 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
}
define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
-; CHECK-LABEL: sext_v16i8_v16i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: sunpklo z1.h, z0.b
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.h, z0.b
-; CHECK-NEXT: sunpklo z2.s, z1.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: stp q2, q1, [x0]
-; CHECK-NEXT: stp q3, q0, [x0, #32]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v16i8_v16i32:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT: sunpklo z1.h, z0.b
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.h, z0.b
+; SVE-NEXT: sunpklo z2.s, z1.h
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: sunpklo z3.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z1.s, z1.h
+; SVE-NEXT: sunpklo z0.s, z0.h
+; SVE-NEXT: stp q2, q1, [x0]
+; SVE-NEXT: stp q3, q0, [x0, #32]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v16i8_v16i32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.h, z0.b
+; SVE2-NEXT: sunpklo z2.h, z2.b
+; SVE2-NEXT: ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.s, z0.h
+; SVE2-NEXT: ext z5.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: sunpklo z2.s, z2.h
+; SVE2-NEXT: sunpklo z1.s, z4.h
+; SVE2-NEXT: sunpklo z3.s, z5.h
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: stp q2, q3, [x0, #32]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v16i8_v16i32:
; NONEON-NOSVE: // %bb.0:
@@ -460,34 +538,63 @@ define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
}
define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
-; CHECK-LABEL: sext_v32i8_v32i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: add z0.b, z0.b, z0.b
-; CHECK-NEXT: add z1.b, z1.b, z1.b
-; CHECK-NEXT: sunpklo z2.h, z0.b
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.h, z1.b
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.h, z0.b
-; CHECK-NEXT: sunpklo z4.s, z2.h
-; CHECK-NEXT: sunpklo z5.s, z3.h
-; CHECK-NEXT: sunpklo z1.h, z1.b
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: sunpklo z6.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z7.s, z1.h
-; CHECK-NEXT: sunpklo z2.s, z2.h
-; CHECK-NEXT: sunpklo z3.s, z3.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: stp q5, q3, [x1]
-; CHECK-NEXT: stp q4, q2, [x1, #64]
-; CHECK-NEXT: stp q6, q0, [x1, #96]
-; CHECK-NEXT: stp q7, q1, [x1, #32]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v32i8_v32i32:
+; SVE: // %bb.0:
+; SVE-NEXT: ldp q1, q0, [x0]
+; SVE-NEXT: add z0.b, z0.b, z0.b
+; SVE-NEXT: add z1.b, z1.b, z1.b
+; SVE-NEXT: sunpklo z2.h, z0.b
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z3.h, z1.b
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.h, z0.b
+; SVE-NEXT: sunpklo z4.s, z2.h
+; SVE-NEXT: sunpklo z5.s, z3.h
+; SVE-NEXT: sunpklo z1.h, z1.b
+; SVE-NEXT: ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT: ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT: sunpklo z6.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z7.s, z1.h
+; SVE-NEXT: sunpklo z2.s, z2.h
+; SVE-NEXT: sunpklo z3.s, z3.h
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.s, z0.h
+; SVE-NEXT: sunpklo z1.s, z1.h
+; SVE-NEXT: stp q5, q3, [x1]
+; SVE-NEXT: stp q4, q2, [x1, #64]
+; SVE-NEXT: stp q6, q0, [x1, #96]
+; SVE-NEXT: stp q7, q1, [x1, #32]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v32i8_v32i32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: ldp q1, q0, [x0]
+; SVE2-NEXT: add z2.b, z0.b, z0.b
+; SVE2-NEXT: add z0.b, z1.b, z1.b
+; SVE2-NEXT: ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: sunpklo z2.h, z2.b
+; SVE2-NEXT: sunpklo z5.h, z0.b
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z16.h, z4.b
+; SVE2-NEXT: ext z1.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: sunpklo z18.h, z0.b
+; SVE2-NEXT: ext z0.b, { z5.b, z6.b }, #8
+; SVE2-NEXT: sunpklo z2.s, z2.h
+; SVE2-NEXT: sunpklo z3.s, z5.h
+; SVE2-NEXT: sunpklo z1.s, z1.h
+; SVE2-NEXT: sunpklo z0.s, z0.h
+; SVE2-NEXT: ext z4.b, { z16.b, z17.b }, #8
+; SVE2-NEXT: ext z5.b, { z18.b, z19.b }, #8
+; SVE2-NEXT: sunpklo z6.s, z16.h
+; SVE2-NEXT: stp q3, q0, [x1]
+; SVE2-NEXT: sunpklo z3.s, z18.h
+; SVE2-NEXT: stp q2, q1, [x1, #64]
+; SVE2-NEXT: sunpklo z2.s, z4.h
+; SVE2-NEXT: sunpklo z1.s, z5.h
+; SVE2-NEXT: stp q3, q1, [x1, #32]
+; SVE2-NEXT: stp q6, q2, [x1, #96]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v32i8_v32i32:
; NONEON-NOSVE: // %bb.0:
@@ -659,18 +766,31 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
; extend is a two step process where the container is any_extend'd with the
; result feeding an inreg sign extend.
define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
-; CHECK-LABEL: sext_v4i8_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z1.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: sxtb z1.d, p0/m, z1.d
-; CHECK-NEXT: sxtb z0.d, p0/m, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v4i8_v4i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT: ptrue p0.d, vl2
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: uunpklo z1.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.d, z0.s
+; SVE-NEXT: sxtb z1.d, p0/m, z1.d
+; SVE-NEXT: sxtb z0.d, p0/m, z0.d
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v4i8_v4i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT: ptrue p0.d, vl2
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: uunpklo z1.d, z2.s
+; SVE2-NEXT: sxtb z0.d, p0/m, z0.d
+; SVE2-NEXT: sxtb z1.d, p0/m, z1.d
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v4i8_v4i64:
; NONEON-NOSVE: // %bb.0:
@@ -695,22 +815,39 @@ define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
}
define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
-; CHECK-LABEL: sext_v8i8_v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: sunpklo z0.h, z0.b
-; CHECK-NEXT: sunpklo z1.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sunpklo z2.d, z1.s
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z1.d, z1.s
-; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: stp q2, q1, [x0]
-; CHECK-NEXT: stp q3, q0, [x0, #32]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v8i8_v8i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT: sunpklo z0.h, z0.b
+; SVE-NEXT: sunpklo z1.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.s, z0.h
+; SVE-NEXT: sunpklo z2.d, z1.s
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: sunpklo z3.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z1.d, z1.s
+; SVE-NEXT: sunpklo z0.d, z0.s
+; SVE-NEXT: stp q2, q1, [x0]
+; SVE-NEXT: stp q3, q0, [x0, #32]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v8i8_v8i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT: sunpklo z0.h, z0.b
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.s, z0.h
+; SVE2-NEXT: sunpklo z2.s, z2.h
+; SVE2-NEXT: ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.d, z0.s
+; SVE2-NEXT: ext z5.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: sunpklo z1.d, z4.s
+; SVE2-NEXT: sunpklo z2.d, z2.s
+; SVE2-NEXT: sunpklo z3.d, z5.s
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: stp q2, q3, [x0, #32]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v8i8_v8i64:
; NONEON-NOSVE: // %bb.0:
@@ -741,35 +878,65 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
}
define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
-; CHECK-LABEL: sext_v16i8_v16i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: sunpklo z1.h, z0.b
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.h, z0.b
-; CHECK-NEXT: sunpklo z2.s, z1.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z4.d, z2.s
-; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sunpklo z5.d, z3.s
-; CHECK-NEXT: sunpklo z2.d, z2.s
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: sunpklo z6.d, z1.s
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z7.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.d, z3.s
-; CHECK-NEXT: sunpklo z1.d, z1.s
-; CHECK-NEXT: stp q4, q2, [x0]
-; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: stp q6, q1, [x0, #32]
-; CHECK-NEXT: stp q5, q3, [x0, #64]
-; CHECK-NEXT: stp q7, q0, [x0, #96]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v16i8_v16i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT: sunpklo z1.h, z0.b
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.h, z0.b
+; SVE-NEXT: sunpklo z2.s, z1.h
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: sunpklo z3.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z4.d, z2.s
+; SVE-NEXT: sunpklo z1.s, z1.h
+; SVE-NEXT: ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.s, z0.h
+; SVE-NEXT: sunpklo z5.d, z3.s
+; SVE-NEXT: sunpklo z2.d, z2.s
+; SVE-NEXT: ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT: sunpklo z6.d, z1.s
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: sunpklo z7.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z3.d, z3.s
+; SVE-NEXT: sunpklo z1.d, z1.s
+; SVE-NEXT: stp q4, q2, [x0]
+; SVE-NEXT: sunpklo z0.d, z0.s
+; SVE-NEXT: stp q6, q1, [x0, #32]
+; SVE-NEXT: stp q5, q3, [x0, #64]
+; SVE-NEXT: stp q7, q0, [x0, #96]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v16i8_v16i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.h, z0.b
+; SVE2-NEXT: sunpklo z2.h, z2.b
+; SVE2-NEXT: ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.s, z0.h
+; SVE2-NEXT: sunpklo z5.s, z2.h
+; SVE2-NEXT: ext z2.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: sunpklo z3.s, z4.h
+; SVE2-NEXT: ext z7.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.d, z0.s
+; SVE2-NEXT: sunpklo z1.s, z2.h
+; SVE2-NEXT: ext z16.b, { z5.b, z6.b }, #8
+; SVE2-NEXT: sunpklo z5.d, z5.s
+; SVE2-NEXT: sunpklo z7.d, z7.s
+; SVE2-NEXT: ext z6.b, { z3.b, z4.b }, #8
+; SVE2-NEXT: sunpklo z3.d, z3.s
+; SVE2-NEXT: sunpklo z16.d, z16.s
+; SVE2-NEXT: sunpklo z4.d, z6.s
+; SVE2-NEXT: stp q0, q7, [x0]
+; SVE2-NEXT: ext z0.b, { z1.b, z2.b }, #8
+; SVE2-NEXT: sunpklo z1.d, z1.s
+; SVE2-NEXT: stp q5, q16, [x0, #64]
+; SVE2-NEXT: sunpklo z0.d, z0.s
+; SVE2-NEXT: stp q3, q4, [x0, #32]
+; SVE2-NEXT: stp q1, q0, [x0, #96]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v16i8_v16i64:
; NONEON-NOSVE: // %bb.0:
@@ -817,67 +984,125 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
}
define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
-; CHECK-LABEL: sext_v32i8_v32i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: add z0.b, z0.b, z0.b
-; CHECK-NEXT: add z1.b, z1.b, z1.b
-; CHECK-NEXT: mov z2.d, z0.d
-; CHECK-NEXT: sunpklo z3.h, z1.b
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.h, z0.b
-; CHECK-NEXT: sunpklo z1.h, z1.b
-; CHECK-NEXT: sunpklo z4.s, z3.h
-; CHECK-NEXT: sunpklo z2.h, z2.b
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: sunpklo z5.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: mov z7.d, z1.d
-; CHECK-NEXT: sunpklo z16.d, z4.s
-; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: sunpklo z6.s, z2.h
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: mov z17.d, z5.d
-; CHECK-NEXT: sunpklo z3.s, z3.h
-; CHECK-NEXT: sunpklo z5.d, z5.s
-; CHECK-NEXT: sunpklo z20.d, z1.s
-; CHECK-NEXT: sunpklo z4.d, z4.s
-; CHECK-NEXT: sunpklo z2.s, z2.h
-; CHECK-NEXT: sunpklo z7.s, z7.h
-; CHECK-NEXT: sunpklo z18.d, z6.s
-; CHECK-NEXT: ext z17.b, z17.b, z0.b, #8
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT: sunpklo z19.d, z3.s
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: stp q16, q4, [x1, #128]
-; CHECK-NEXT: sunpklo z16.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z17.d, z17.s
-; CHECK-NEXT: mov z4.d, z7.d
-; CHECK-NEXT: sunpklo z1.d, z1.s
-; CHECK-NEXT: sunpklo z3.d, z3.s
-; CHECK-NEXT: sunpklo z7.d, z7.s
-; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: stp q5, q17, [x1]
-; CHECK-NEXT: sunpklo z5.d, z6.s
-; CHECK-NEXT: mov z6.d, z2.d
-; CHECK-NEXT: stp q19, q3, [x1, #160]
-; CHECK-NEXT: sunpklo z2.d, z2.s
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: stp q16, q0, [x1, #32]
-; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT: stp q20, q1, [x1, #192]
-; CHECK-NEXT: stp q18, q5, [x1, #64]
-; CHECK-NEXT: sunpklo z1.d, z4.s
-; CHECK-NEXT: sunpklo z3.d, z6.s
-; CHECK-NEXT: stp q7, q1, [x1, #224]
-; CHECK-NEXT: stp q2, q3, [x1, #96]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v32i8_v32i64:
+; SVE: // %bb.0:
+; SVE-NEXT: ldp q0, q1, [x0]
+; SVE-NEXT: add z0.b, z0.b, z0.b
+; SVE-NEXT: add z1.b, z1.b, z1.b
+; SVE-NEXT: mov z2.d, z0.d
+; SVE-NEXT: sunpklo z3.h, z1.b
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.h, z0.b
+; SVE-NEXT: sunpklo z1.h, z1.b
+; SVE-NEXT: sunpklo z4.s, z3.h
+; SVE-NEXT: sunpklo z2.h, z2.b
+; SVE-NEXT: ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT: sunpklo z5.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: mov z7.d, z1.d
+; SVE-NEXT: sunpklo z16.d, z4.s
+; SVE-NEXT: sunpklo z1.s, z1.h
+; SVE-NEXT: sunpklo z6.s, z2.h
+; SVE-NEXT: ext z4.b, z4.b, z0.b, #8
+; SVE-NEXT: ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT: ext z7.b, z7.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.s, z0.h
+; SVE-NEXT: mov z17.d, z5.d
+; SVE-NEXT: sunpklo z3.s, z3.h
+; SVE-NEXT: sunpklo z5.d, z5.s
+; SVE-NEXT: sunpklo z20.d, z1.s
+; SVE-NEXT: sunpklo z4.d, z4.s
+; SVE-NEXT: sunpklo z2.s, z2.h
+; SVE-NEXT: sunpklo z7.s, z7.h
+; SVE-NEXT: sunpklo z18.d, z6.s
+; SVE-NEXT: ext z17.b, z17.b, z0.b, #8
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: ext z6.b, z6.b, z0.b, #8
+; SVE-NEXT: sunpklo z19.d, z3.s
+; SVE-NEXT: ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT: stp q16, q4, [x1, #128]
+; SVE-NEXT: sunpklo z16.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z17.d, z17.s
+; SVE-NEXT: mov z4.d, z7.d
+; SVE-NEXT: sunpklo z1.d, z1.s
+; SVE-NEXT: sunpklo z3.d, z3.s
+; SVE-NEXT: sunpklo z7.d, z7.s
+; SVE-NEXT: sunpklo z0.d, z0.s
+; SVE-NEXT: stp q5, q17, [x1]
+; SVE-NEXT: sunpklo z5.d, z6.s
+; SVE-NEXT: mov z6.d, z2.d
+; SVE-NEXT: stp q19, q3, [x1, #160]
+; SVE-NEXT: sunpklo z2.d, z2.s
+; SVE-NEXT: ext z4.b, z4.b, z0.b, #8
+; SVE-NEXT: stp q16, q0, [x1, #32]
+; SVE-NEXT: ext z6.b, z6.b, z0.b, #8
+; SVE-NEXT: stp q20, q1, [x1, #192]
+; SVE-NEXT: stp q18, q5, [x1, #64]
+; SVE-NEXT: sunpklo z1.d, z4.s
+; SVE-NEXT: sunpklo z3.d, z6.s
+; SVE-NEXT: stp q7, q1, [x1, #224]
+; SVE-NEXT: stp q2, q3, [x1, #96]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v32i8_v32i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: ldp q1, q0, [x0]
+; SVE2-NEXT: add z2.b, z0.b, z0.b
+; SVE2-NEXT: add z0.b, z1.b, z1.b
+; SVE2-NEXT: ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: sunpklo z2.h, z2.b
+; SVE2-NEXT: ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.h, z0.b
+; SVE2-NEXT: sunpklo z6.h, z4.b
+; SVE2-NEXT: sunpklo z4.h, z5.b
+; SVE2-NEXT: ext z16.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: sunpklo z2.s, z2.h
+; SVE2-NEXT: sunpklo z17.s, z0.h
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z19.s, z6.h
+; SVE2-NEXT: sunpklo z21.s, z16.h
+; SVE2-NEXT: ext z6.b, { z6.b, z7.b }, #8
+; SVE2-NEXT: ext z7.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: sunpklo z2.d, z2.s
+; SVE2-NEXT: sunpklo z23.s, z0.h
+; SVE2-NEXT: ext z0.b, { z17.b, z18.b }, #8
+; SVE2-NEXT: sunpklo z16.d, z17.s
+; SVE2-NEXT: ext z1.b, { z4.b, z5.b }, #8
+; SVE2-NEXT: sunpklo z4.s, z4.h
+; SVE2-NEXT: sunpklo z3.d, z19.s
+; SVE2-NEXT: ext z17.b, { z19.b, z20.b }, #8
+; SVE2-NEXT: sunpklo z19.s, z6.h
+; SVE2-NEXT: ext z6.b, { z21.b, z22.b }, #8
+; SVE2-NEXT: sunpklo z18.d, z21.s
+; SVE2-NEXT: sunpklo z7.d, z7.s
+; SVE2-NEXT: sunpklo z0.d, z0.s
+; SVE2-NEXT: str q16, [x1]
+; SVE2-NEXT: ext z21.b, { z4.b, z5.b }, #8
+; SVE2-NEXT: sunpklo z4.d, z4.s
+; SVE2-NEXT: sunpklo z5.d, z17.s
+; SVE2-NEXT: sunpklo z6.d, z6.s
+; SVE2-NEXT: stp q2, q7, [x1, #128]
+; SVE2-NEXT: sunpklo z2.d, z23.s
+; SVE2-NEXT: stp q3, q5, [x1, #192]
+; SVE2-NEXT: ext z3.b, { z23.b, z24.b }, #8
+; SVE2-NEXT: stp q18, q6, [x1, #160]
+; SVE2-NEXT: sunpklo z17.s, z1.h
+; SVE2-NEXT: sunpklo z1.d, z21.s
+; SVE2-NEXT: stp q0, q2, [x1, #16]
+; SVE2-NEXT: ext z2.b, { z19.b, z20.b }, #8
+; SVE2-NEXT: sunpklo z3.d, z3.s
+; SVE2-NEXT: ext z0.b, { z17.b, z18.b }, #8
+; SVE2-NEXT: stp q4, q1, [x1, #64]
+; SVE2-NEXT: sunpklo z4.d, z19.s
+; SVE2-NEXT: sunpklo z2.d, z2.s
+; SVE2-NEXT: sunpklo z5.d, z17.s
+; SVE2-NEXT: str q3, [x1, #48]
+; SVE2-NEXT: sunpklo z0.d, z0.s
+; SVE2-NEXT: stp q4, q2, [x1, #224]
+; SVE2-NEXT: stp q5, q0, [x1, #96]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v32i8_v32i64:
; NONEON-NOSVE: // %bb.0:
@@ -1054,14 +1279,23 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
;
define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
-; CHECK-LABEL: sext_v8i16_v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: sunpklo z1.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v8i16_v8i32:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT: sunpklo z1.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.s, z0.h
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v8i16_v8i32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.s, z0.h
+; SVE2-NEXT: sunpklo z1.s, z2.h
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v8i16_v8i32:
; NONEON-NOSVE: // %bb.0:
@@ -1091,20 +1325,35 @@ define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
}
define void @sext_v16i16_v16i32(ptr %in, ptr %out) {
-; CHECK-LABEL: sext_v16i16_v16i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: add z0.h, z0.h, z0.h
-; CHECK-NEXT: add z1.h, z1.h, z1.h
-; CHECK-NEXT: sunpklo z2.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.s, z1.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: stp q2, q0, [x1, #32]
-; CHECK-NEXT: stp q3, q1, [x1]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v16i16_v16i32:
+; SVE: // %bb.0:
+; SVE-NEXT: ldp q1, q0, [x0]
+; SVE-NEXT: add z0.h, z0.h, z0.h
+; SVE-NEXT: add z1.h, z1.h, z1.h
+; SVE-NEXT: sunpklo z2.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z3.s, z1.h
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.s, z0.h
+; SVE-NEXT: sunpklo z1.s, z1.h
+; SVE-NEXT: stp q2, q0, [x1, #32]
+; SVE-NEXT: stp q3, q1, [x1]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v16i16_v16i32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: ldp q1, q0, [x0]
+; SVE2-NEXT: add z2.h, z0.h, z0.h
+; SVE2-NEXT: add z0.h, z1.h, z1.h
+; SVE2-NEXT: ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z2.s, z2.h
+; SVE2-NEXT: sunpklo z0.s, z0.h
+; SVE2-NEXT: sunpklo z3.s, z4.h
+; SVE2-NEXT: sunpklo z1.s, z5.h
+; SVE2-NEXT: stp q0, q1, [x1]
+; SVE2-NEXT: stp q2, q3, [x1, #32]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v16i16_v16i32:
; NONEON-NOSVE: // %bb.0:
@@ -1185,15 +1434,25 @@ define void @sext_v16i16_v16i32(ptr %in, ptr %out) {
;
define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
-; CHECK-LABEL: sext_v4i16_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sunpklo z1.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v4i16_v4i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT: sunpklo z0.s, z0.h
+; SVE-NEXT: sunpklo z1.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.d, z0.s
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v4i16_v4i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT: sunpklo z0.s, z0.h
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.d, z0.s
+; SVE2-NEXT: sunpklo z1.d, z2.s
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v4i16_v4i64:
; NONEON-NOSVE: // %bb.0:
@@ -1216,21 +1475,37 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
}
define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
-; CHECK-LABEL: sext_v8i16_v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: sunpklo z1.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sunpklo z2.d, z1.s
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z1.d, z1.s
-; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: stp q2, q1, [x0]
-; CHECK-NEXT: stp q3, q0, [x0, #32]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v8i16_v8i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT: sunpklo z1.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.s, z0.h
+; SVE-NEXT: sunpklo z2.d, z1.s
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: sunpklo z3.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z1.d, z1.s
+; SVE-NEXT: sunpklo z0.d, z0.s
+; SVE-NEXT: stp q2, q1, [x0]
+; SVE-NEXT: stp q3, q0, [x0, #32]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v8i16_v8i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.s, z0.h
+; SVE2-NEXT: sunpklo z2.s, z2.h
+; SVE2-NEXT: ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.d, z0.s
+; SVE2-NEXT: ext z5.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: sunpklo z2.d, z2.s
+; SVE2-NEXT: sunpklo z1.d, z4.s
+; SVE2-NEXT: sunpklo z3.d, z5.s
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: stp q2, q3, [x0, #32]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v8i16_v8i64:
; NONEON-NOSVE: // %bb.0:
@@ -1262,34 +1537,63 @@ define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
}
define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
-; CHECK-LABEL: sext_v16i16_v16i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: add z0.h, z0.h, z0.h
-; CHECK-NEXT: add z1.h, z1.h, z1.h
-; CHECK-NEXT: sunpklo z2.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.s, z1.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.s, z0.h
-; CHECK-NEXT: sunpklo z4.d, z2.s
-; CHECK-NEXT: sunpklo z5.d, z3.s
-; CHECK-NEXT: sunpklo z1.s, z1.h
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: sunpklo z6.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z7.d, z1.s
-; CHECK-NEXT: sunpklo z2.d, z2.s
-; CHECK-NEXT: sunpklo z3.d, z3.s
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: sunpklo z1.d, z1.s
-; CHECK-NEXT: stp q5, q3, [x1]
-; CHECK-NEXT: stp q4, q2, [x1, #64]
-; CHECK-NEXT: stp q6, q0, [x1, #96]
-; CHECK-NEXT: stp q7, q1, [x1, #32]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v16i16_v16i64:
+; SVE: // %bb.0:
+; SVE-NEXT: ldp q1, q0, [x0]
+; SVE-NEXT: add z0.h, z0.h, z0.h
+; SVE-NEXT: add z1.h, z1.h, z1.h
+; SVE-NEXT: sunpklo z2.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z3.s, z1.h
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.s, z0.h
+; SVE-NEXT: sunpklo z4.d, z2.s
+; SVE-NEXT: sunpklo z5.d, z3.s
+; SVE-NEXT: sunpklo z1.s, z1.h
+; SVE-NEXT: ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT: ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT: sunpklo z6.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z7.d, z1.s
+; SVE-NEXT: sunpklo z2.d, z2.s
+; SVE-NEXT: sunpklo z3.d, z3.s
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.d, z0.s
+; SVE-NEXT: sunpklo z1.d, z1.s
+; SVE-NEXT: stp q5, q3, [x1]
+; SVE-NEXT: stp q4, q2, [x1, #64]
+; SVE-NEXT: stp q6, q0, [x1, #96]
+; SVE-NEXT: stp q7, q1, [x1, #32]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v16i16_v16i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: ldp q1, q0, [x0]
+; SVE2-NEXT: add z2.h, z0.h, z0.h
+; SVE2-NEXT: add z0.h, z1.h, z1.h
+; SVE2-NEXT: ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: sunpklo z2.s, z2.h
+; SVE2-NEXT: sunpklo z5.s, z0.h
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z16.s, z4.h
+; SVE2-NEXT: ext z1.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: sunpklo z18.s, z0.h
+; SVE2-NEXT: ext z0.b, { z5.b, z6.b }, #8
+; SVE2-NEXT: sunpklo z2.d, z2.s
+; SVE2-NEXT: sunpklo z3.d, z5.s
+; SVE2-NEXT: sunpklo z1.d, z1.s
+; SVE2-NEXT: sunpklo z0.d, z0.s
+; SVE2-NEXT: ext z4.b, { z16.b, z17.b }, #8
+; SVE2-NEXT: ext z5.b, { z18.b, z19.b }, #8
+; SVE2-NEXT: sunpklo z6.d, z16.s
+; SVE2-NEXT: stp q3, q0, [x1]
+; SVE2-NEXT: sunpklo z3.d, z18.s
+; SVE2-NEXT: stp q2, q1, [x1, #64]
+; SVE2-NEXT: sunpklo z2.d, z4.s
+; SVE2-NEXT: sunpklo z1.d, z5.s
+; SVE2-NEXT: stp q3, q1, [x1, #32]
+; SVE2-NEXT: stp q6, q2, [x1, #96]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v16i16_v16i64:
; NONEON-NOSVE: // %bb.0:
@@ -1375,14 +1679,23 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
;
define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
-; CHECK-LABEL: sext_v4i32_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: sunpklo z1.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v4i32_v4i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT: sunpklo z1.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.d, z0.s
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v4i32_v4i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z0.d, z0.s
+; SVE2-NEXT: sunpklo z1.d, z2.s
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v4i32_v4i64:
; NONEON-NOSVE: // %bb.0:
@@ -1404,20 +1717,35 @@ define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
}
define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
-; CHECK-LABEL: sext_v8i32_v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: add z0.s, z0.s, z0.s
-; CHECK-NEXT: add z1.s, z1.s, z1.s
-; CHECK-NEXT: sunpklo z2.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.d, z1.s
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: sunpklo z0.d, z0.s
-; CHECK-NEXT: sunpklo z1.d, z1.s
-; CHECK-NEXT: stp q2, q0, [x1, #32]
-; CHECK-NEXT: stp q3, q1, [x1]
-; CHECK-NEXT: ret
+; SVE-LABEL: sext_v8i32_v8i64:
+; SVE: // %bb.0:
+; SVE-NEXT: ldp q1, q0, [x0]
+; SVE-NEXT: add z0.s, z0.s, z0.s
+; SVE-NEXT: add z1.s, z1.s, z1.s
+; SVE-NEXT: sunpklo z2.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: sunpklo z3.d, z1.s
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: sunpklo z0.d, z0.s
+; SVE-NEXT: sunpklo z1.d, z1.s
+; SVE-NEXT: stp q2, q0, [x1, #32]
+; SVE-NEXT: stp q3, q1, [x1]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: sext_v8i32_v8i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: ldp q1, q0, [x0]
+; SVE2-NEXT: add z2.s, z0.s, z0.s
+; SVE2-NEXT: add z0.s, z1.s, z1.s
+; SVE2-NEXT: ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: sunpklo z2.d, z2.s
+; SVE2-NEXT: sunpklo z0.d, z0.s
+; SVE2-NEXT: sunpklo z3.d, z4.s
+; SVE2-NEXT: sunpklo z1.d, z5.s
+; SVE2-NEXT: stp q0, q1, [x1]
+; SVE2-NEXT: stp q2, q3, [x1, #32]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: sext_v8i32_v8i64:
; NONEON-NOSVE: // %bb.0:
@@ -1466,14 +1794,23 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
;
define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
-; CHECK-LABEL: zext_v16i8_v16i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z1.h, z0.b
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v16i8_v16i16:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT: uunpklo z1.h, z0.b
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.h, z0.b
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v16i8_v16i16:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.h, z0.b
+; SVE2-NEXT: uunpklo z1.h, z2.b
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v16i8_v16i16:
; NONEON-NOSVE: // %bb.0:
@@ -1524,20 +1861,35 @@ define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
; NOTE: Extra 'add' is to prevent the extend being combined with the load.
define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
-; CHECK-LABEL: zext_v32i8_v32i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: add z0.b, z0.b, z0.b
-; CHECK-NEXT: add z1.b, z1.b, z1.b
-; CHECK-NEXT: uunpklo z2.h, z0.b
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.h, z1.b
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: uunpklo z1.h, z1.b
-; CHECK-NEXT: stp q2, q0, [x1, #32]
-; CHECK-NEXT: stp q3, q1, [x1]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v32i8_v32i16:
+; SVE: // %bb.0:
+; SVE-NEXT: ldp q1, q0, [x0]
+; SVE-NEXT: add z0.b, z0.b, z0.b
+; SVE-NEXT: add z1.b, z1.b, z1.b
+; SVE-NEXT: uunpklo z2.h, z0.b
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z3.h, z1.b
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.h, z0.b
+; SVE-NEXT: uunpklo z1.h, z1.b
+; SVE-NEXT: stp q2, q0, [x1, #32]
+; SVE-NEXT: stp q3, q1, [x1]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v32i8_v32i16:
+; SVE2: // %bb.0:
+; SVE2-NEXT: ldp q1, q0, [x0]
+; SVE2-NEXT: add z2.b, z0.b, z0.b
+; SVE2-NEXT: add z0.b, z1.b, z1.b
+; SVE2-NEXT: ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z2.h, z2.b
+; SVE2-NEXT: uunpklo z0.h, z0.b
+; SVE2-NEXT: uunpklo z3.h, z4.b
+; SVE2-NEXT: uunpklo z1.h, z5.b
+; SVE2-NEXT: stp q0, q1, [x1]
+; SVE2-NEXT: stp q2, q3, [x1, #32]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v32i8_v32i16:
; NONEON-NOSVE: // %bb.0:
@@ -1718,15 +2070,25 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
;
define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
-; CHECK-LABEL: zext_v8i8_v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: uunpklo z1.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v8i8_v8i32:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT: uunpklo z0.h, z0.b
+; SVE-NEXT: uunpklo z1.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v8i8_v8i32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT: uunpklo z0.h, z0.b
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: uunpklo z1.s, z2.h
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v8i8_v8i32:
; NONEON-NOSVE: // %bb.0:
@@ -1755,21 +2117,37 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
}
define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
-; CHECK-LABEL: zext_v16i8_v16i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z1.h, z0.b
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: uunpklo z2.s, z1.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: stp q2, q1, [x0]
-; CHECK-NEXT: stp q3, q0, [x0, #32]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v16i8_v16i32:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT: uunpklo z1.h, z0.b
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.h, z0.b
+; SVE-NEXT: uunpklo z2.s, z1.h
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: uunpklo z3.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z1.s, z1.h
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: stp q2, q1, [x0]
+; SVE-NEXT: stp q3, q0, [x0, #32]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v16i8_v16i32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.h, z0.b
+; SVE2-NEXT: uunpklo z2.h, z2.b
+; SVE2-NEXT: ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: ext z5.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: uunpklo z2.s, z2.h
+; SVE2-NEXT: uunpklo z1.s, z4.h
+; SVE2-NEXT: uunpklo z3.s, z5.h
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: stp q2, q3, [x0, #32]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v16i8_v16i32:
; NONEON-NOSVE: // %bb.0:
@@ -1813,34 +2191,63 @@ define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
}
define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
-; CHECK-LABEL: zext_v32i8_v32i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: add z0.b, z0.b, z0.b
-; CHECK-NEXT: add z1.b, z1.b, z1.b
-; CHECK-NEXT: uunpklo z2.h, z0.b
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.h, z1.b
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: uunpklo z4.s, z2.h
-; CHECK-NEXT: uunpklo z5.s, z3.h
-; CHECK-NEXT: uunpklo z1.h, z1.b
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: uunpklo z6.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z7.s, z1.h
-; CHECK-NEXT: uunpklo z2.s, z2.h
-; CHECK-NEXT: uunpklo z3.s, z3.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: stp q5, q3, [x1]
-; CHECK-NEXT: stp q4, q2, [x1, #64]
-; CHECK-NEXT: stp q6, q0, [x1, #96]
-; CHECK-NEXT: stp q7, q1, [x1, #32]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v32i8_v32i32:
+; SVE: // %bb.0:
+; SVE-NEXT: ldp q1, q0, [x0]
+; SVE-NEXT: add z0.b, z0.b, z0.b
+; SVE-NEXT: add z1.b, z1.b, z1.b
+; SVE-NEXT: uunpklo z2.h, z0.b
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z3.h, z1.b
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.h, z0.b
+; SVE-NEXT: uunpklo z4.s, z2.h
+; SVE-NEXT: uunpklo z5.s, z3.h
+; SVE-NEXT: uunpklo z1.h, z1.b
+; SVE-NEXT: ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT: ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT: uunpklo z6.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z7.s, z1.h
+; SVE-NEXT: uunpklo z2.s, z2.h
+; SVE-NEXT: uunpklo z3.s, z3.h
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: uunpklo z1.s, z1.h
+; SVE-NEXT: stp q5, q3, [x1]
+; SVE-NEXT: stp q4, q2, [x1, #64]
+; SVE-NEXT: stp q6, q0, [x1, #96]
+; SVE-NEXT: stp q7, q1, [x1, #32]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v32i8_v32i32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: ldp q1, q0, [x0]
+; SVE2-NEXT: add z2.b, z0.b, z0.b
+; SVE2-NEXT: add z0.b, z1.b, z1.b
+; SVE2-NEXT: ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: uunpklo z2.h, z2.b
+; SVE2-NEXT: uunpklo z5.h, z0.b
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z16.h, z4.b
+; SVE2-NEXT: ext z1.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: uunpklo z18.h, z0.b
+; SVE2-NEXT: ext z0.b, { z5.b, z6.b }, #8
+; SVE2-NEXT: uunpklo z2.s, z2.h
+; SVE2-NEXT: uunpklo z3.s, z5.h
+; SVE2-NEXT: uunpklo z1.s, z1.h
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: ext z4.b, { z16.b, z17.b }, #8
+; SVE2-NEXT: ext z5.b, { z18.b, z19.b }, #8
+; SVE2-NEXT: uunpklo z6.s, z16.h
+; SVE2-NEXT: stp q3, q0, [x1]
+; SVE2-NEXT: uunpklo z3.s, z18.h
+; SVE2-NEXT: stp q2, q1, [x1, #64]
+; SVE2-NEXT: uunpklo z2.s, z4.h
+; SVE2-NEXT: uunpklo z1.s, z5.h
+; SVE2-NEXT: stp q3, q1, [x1, #32]
+; SVE2-NEXT: stp q6, q2, [x1, #96]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v32i8_v32i32:
; NONEON-NOSVE: // %bb.0:
@@ -2012,16 +2419,27 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
; extend is a two step process where the container is zero_extend_inreg'd with
; the result feeding a normal zero extend from halfs to doublewords.
define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
-; CHECK-LABEL: zext_v4i8_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: and z0.h, z0.h, #0xff
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z1.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v4i8_v4i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT: and z0.h, z0.h, #0xff
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: uunpklo z1.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.d, z0.s
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v4i8_v4i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT: and z0.h, z0.h, #0xff
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: uunpklo z1.d, z2.s
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v4i8_v4i64:
; NONEON-NOSVE: // %bb.0:
@@ -2046,22 +2464,39 @@ define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
}
define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
-; CHECK-LABEL: zext_v8i8_v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: uunpklo z1.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z2.d, z1.s
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: stp q2, q1, [x0]
-; CHECK-NEXT: stp q3, q0, [x0, #32]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v8i8_v8i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT: uunpklo z0.h, z0.b
+; SVE-NEXT: uunpklo z1.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: uunpklo z2.d, z1.s
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: uunpklo z3.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z1.d, z1.s
+; SVE-NEXT: uunpklo z0.d, z0.s
+; SVE-NEXT: stp q2, q1, [x0]
+; SVE-NEXT: stp q3, q0, [x0, #32]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v8i8_v8i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT: uunpklo z0.h, z0.b
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: uunpklo z2.s, z2.h
+; SVE2-NEXT: ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: ext z5.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: uunpklo z1.d, z4.s
+; SVE2-NEXT: uunpklo z2.d, z2.s
+; SVE2-NEXT: uunpklo z3.d, z5.s
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: stp q2, q3, [x0, #32]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v8i8_v8i64:
; NONEON-NOSVE: // %bb.0:
@@ -2096,35 +2531,65 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
}
define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
-; CHECK-LABEL: zext_v16i8_v16i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z1.h, z0.b
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: uunpklo z2.s, z1.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z4.d, z2.s
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z5.d, z3.s
-; CHECK-NEXT: uunpklo z2.d, z2.s
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: uunpklo z6.d, z1.s
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z7.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.d, z3.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: stp q4, q2, [x0]
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: stp q6, q1, [x0, #32]
-; CHECK-NEXT: stp q5, q3, [x0, #64]
-; CHECK-NEXT: stp q7, q0, [x0, #96]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v16i8_v16i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT: uunpklo z1.h, z0.b
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.h, z0.b
+; SVE-NEXT: uunpklo z2.s, z1.h
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: uunpklo z3.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z4.d, z2.s
+; SVE-NEXT: uunpklo z1.s, z1.h
+; SVE-NEXT: ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: uunpklo z5.d, z3.s
+; SVE-NEXT: uunpklo z2.d, z2.s
+; SVE-NEXT: ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT: uunpklo z6.d, z1.s
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: uunpklo z7.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z3.d, z3.s
+; SVE-NEXT: uunpklo z1.d, z1.s
+; SVE-NEXT: stp q4, q2, [x0]
+; SVE-NEXT: uunpklo z0.d, z0.s
+; SVE-NEXT: stp q6, q1, [x0, #32]
+; SVE-NEXT: stp q5, q3, [x0, #64]
+; SVE-NEXT: stp q7, q0, [x0, #96]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v16i8_v16i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.h, z0.b
+; SVE2-NEXT: uunpklo z2.h, z2.b
+; SVE2-NEXT: ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: uunpklo z5.s, z2.h
+; SVE2-NEXT: ext z2.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: uunpklo z3.s, z4.h
+; SVE2-NEXT: ext z7.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: uunpklo z1.s, z2.h
+; SVE2-NEXT: ext z16.b, { z5.b, z6.b }, #8
+; SVE2-NEXT: uunpklo z5.d, z5.s
+; SVE2-NEXT: uunpklo z7.d, z7.s
+; SVE2-NEXT: ext z6.b, { z3.b, z4.b }, #8
+; SVE2-NEXT: uunpklo z3.d, z3.s
+; SVE2-NEXT: uunpklo z16.d, z16.s
+; SVE2-NEXT: uunpklo z4.d, z6.s
+; SVE2-NEXT: stp q0, q7, [x0]
+; SVE2-NEXT: ext z0.b, { z1.b, z2.b }, #8
+; SVE2-NEXT: uunpklo z1.d, z1.s
+; SVE2-NEXT: stp q5, q16, [x0, #64]
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: stp q3, q4, [x0, #32]
+; SVE2-NEXT: stp q1, q0, [x0, #96]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v16i8_v16i64:
; NONEON-NOSVE: // %bb.0:
@@ -2180,67 +2645,125 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
}
define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
-; CHECK-LABEL: zext_v32i8_v32i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
-; CHECK-NEXT: add z0.b, z0.b, z0.b
-; CHECK-NEXT: add z1.b, z1.b, z1.b
-; CHECK-NEXT: mov z2.d, z0.d
-; CHECK-NEXT: uunpklo z3.h, z1.b
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: uunpklo z1.h, z1.b
-; CHECK-NEXT: uunpklo z4.s, z3.h
-; CHECK-NEXT: uunpklo z2.h, z2.b
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: uunpklo z5.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: mov z7.d, z1.d
-; CHECK-NEXT: uunpklo z16.d, z4.s
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: uunpklo z6.s, z2.h
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: mov z17.d, z5.d
-; CHECK-NEXT: uunpklo z3.s, z3.h
-; CHECK-NEXT: uunpklo z5.d, z5.s
-; CHECK-NEXT: uunpklo z20.d, z1.s
-; CHECK-NEXT: uunpklo z4.d, z4.s
-; CHECK-NEXT: uunpklo z2.s, z2.h
-; CHECK-NEXT: uunpklo z7.s, z7.h
-; CHECK-NEXT: uunpklo z18.d, z6.s
-; CHECK-NEXT: ext z17.b, z17.b, z0.b, #8
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT: uunpklo z19.d, z3.s
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: stp q16, q4, [x1, #128]
-; CHECK-NEXT: uunpklo z16.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z17.d, z17.s
-; CHECK-NEXT: mov z4.d, z7.d
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z3.d, z3.s
-; CHECK-NEXT: uunpklo z7.d, z7.s
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: stp q5, q17, [x1]
-; CHECK-NEXT: uunpklo z5.d, z6.s
-; CHECK-NEXT: mov z6.d, z2.d
-; CHECK-NEXT: stp q19, q3, [x1, #160]
-; CHECK-NEXT: uunpklo z2.d, z2.s
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: stp q16, q0, [x1, #32]
-; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT: stp q20, q1, [x1, #192]
-; CHECK-NEXT: stp q18, q5, [x1, #64]
-; CHECK-NEXT: uunpklo z1.d, z4.s
-; CHECK-NEXT: uunpklo z3.d, z6.s
-; CHECK-NEXT: stp q7, q1, [x1, #224]
-; CHECK-NEXT: stp q2, q3, [x1, #96]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v32i8_v32i64:
+; SVE: // %bb.0:
+; SVE-NEXT: ldp q0, q1, [x0]
+; SVE-NEXT: add z0.b, z0.b, z0.b
+; SVE-NEXT: add z1.b, z1.b, z1.b
+; SVE-NEXT: mov z2.d, z0.d
+; SVE-NEXT: uunpklo z3.h, z1.b
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.h, z0.b
+; SVE-NEXT: uunpklo z1.h, z1.b
+; SVE-NEXT: uunpklo z4.s, z3.h
+; SVE-NEXT: uunpklo z2.h, z2.b
+; SVE-NEXT: ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT: uunpklo z5.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: mov z7.d, z1.d
+; SVE-NEXT: uunpklo z16.d, z4.s
+; SVE-NEXT: uunpklo z1.s, z1.h
+; SVE-NEXT: uunpklo z6.s, z2.h
+; SVE-NEXT: ext z4.b, z4.b, z0.b, #8
+; SVE-NEXT: ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT: ext z7.b, z7.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: mov z17.d, z5.d
+; SVE-NEXT: uunpklo z3.s, z3.h
+; SVE-NEXT: uunpklo z5.d, z5.s
+; SVE-NEXT: uunpklo z20.d, z1.s
+; SVE-NEXT: uunpklo z4.d, z4.s
+; SVE-NEXT: uunpklo z2.s, z2.h
+; SVE-NEXT: uunpklo z7.s, z7.h
+; SVE-NEXT: uunpklo z18.d, z6.s
+; SVE-NEXT: ext z17.b, z17.b, z0.b, #8
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: ext z6.b, z6.b, z0.b, #8
+; SVE-NEXT: uunpklo z19.d, z3.s
+; SVE-NEXT: ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT: stp q16, q4, [x1, #128]
+; SVE-NEXT: uunpklo z16.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z17.d, z17.s
+; SVE-NEXT: mov z4.d, z7.d
+; SVE-NEXT: uunpklo z1.d, z1.s
+; SVE-NEXT: uunpklo z3.d, z3.s
+; SVE-NEXT: uunpklo z7.d, z7.s
+; SVE-NEXT: uunpklo z0.d, z0.s
+; SVE-NEXT: stp q5, q17, [x1]
+; SVE-NEXT: uunpklo z5.d, z6.s
+; SVE-NEXT: mov z6.d, z2.d
+; SVE-NEXT: stp q19, q3, [x1, #160]
+; SVE-NEXT: uunpklo z2.d, z2.s
+; SVE-NEXT: ext z4.b, z4.b, z0.b, #8
+; SVE-NEXT: stp q16, q0, [x1, #32]
+; SVE-NEXT: ext z6.b, z6.b, z0.b, #8
+; SVE-NEXT: stp q20, q1, [x1, #192]
+; SVE-NEXT: stp q18, q5, [x1, #64]
+; SVE-NEXT: uunpklo z1.d, z4.s
+; SVE-NEXT: uunpklo z3.d, z6.s
+; SVE-NEXT: stp q7, q1, [x1, #224]
+; SVE-NEXT: stp q2, q3, [x1, #96]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v32i8_v32i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: ldp q1, q0, [x0]
+; SVE2-NEXT: add z2.b, z0.b, z0.b
+; SVE2-NEXT: add z0.b, z1.b, z1.b
+; SVE2-NEXT: ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: uunpklo z2.h, z2.b
+; SVE2-NEXT: ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.h, z0.b
+; SVE2-NEXT: uunpklo z6.h, z4.b
+; SVE2-NEXT: uunpklo z4.h, z5.b
+; SVE2-NEXT: ext z16.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: uunpklo z2.s, z2.h
+; SVE2-NEXT: uunpklo z17.s, z0.h
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z19.s, z6.h
+; SVE2-NEXT: uunpklo z21.s, z16.h
+; SVE2-NEXT: ext z6.b, { z6.b, z7.b }, #8
+; SVE2-NEXT: ext z7.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: uunpklo z2.d, z2.s
+; SVE2-NEXT: uunpklo z23.s, z0.h
+; SVE2-NEXT: ext z0.b, { z17.b, z18.b }, #8
+; SVE2-NEXT: uunpklo z16.d, z17.s
+; SVE2-NEXT: ext z1.b, { z4.b, z5.b }, #8
+; SVE2-NEXT: uunpklo z4.s, z4.h
+; SVE2-NEXT: uunpklo z3.d, z19.s
+; SVE2-NEXT: ext z17.b, { z19.b, z20.b }, #8
+; SVE2-NEXT: uunpklo z19.s, z6.h
+; SVE2-NEXT: ext z6.b, { z21.b, z22.b }, #8
+; SVE2-NEXT: uunpklo z18.d, z21.s
+; SVE2-NEXT: uunpklo z7.d, z7.s
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: str q16, [x1]
+; SVE2-NEXT: ext z21.b, { z4.b, z5.b }, #8
+; SVE2-NEXT: uunpklo z4.d, z4.s
+; SVE2-NEXT: uunpklo z5.d, z17.s
+; SVE2-NEXT: uunpklo z6.d, z6.s
+; SVE2-NEXT: stp q2, q7, [x1, #128]
+; SVE2-NEXT: uunpklo z2.d, z23.s
+; SVE2-NEXT: stp q3, q5, [x1, #192]
+; SVE2-NEXT: ext z3.b, { z23.b, z24.b }, #8
+; SVE2-NEXT: stp q18, q6, [x1, #160]
+; SVE2-NEXT: uunpklo z17.s, z1.h
+; SVE2-NEXT: uunpklo z1.d, z21.s
+; SVE2-NEXT: stp q0, q2, [x1, #16]
+; SVE2-NEXT: ext z2.b, { z19.b, z20.b }, #8
+; SVE2-NEXT: uunpklo z3.d, z3.s
+; SVE2-NEXT: ext z0.b, { z17.b, z18.b }, #8
+; SVE2-NEXT: stp q4, q1, [x1, #64]
+; SVE2-NEXT: uunpklo z4.d, z19.s
+; SVE2-NEXT: uunpklo z2.d, z2.s
+; SVE2-NEXT: uunpklo z5.d, z17.s
+; SVE2-NEXT: str q3, [x1, #48]
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: stp q4, q2, [x1, #224]
+; SVE2-NEXT: stp q5, q0, [x1, #96]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v32i8_v32i64:
; NONEON-NOSVE: // %bb.0:
@@ -2440,14 +2963,23 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
;
define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
-; CHECK-LABEL: zext_v8i16_v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z1.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v8i16_v8i32:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT: uunpklo z1.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v8i16_v8i32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: uunpklo z1.s, z2.h
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v8i16_v8i32:
; NONEON-NOSVE: // %bb.0:
@@ -2477,20 +3009,35 @@ define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
}
define void @zext_v16i16_v16i32(ptr %in, ptr %out) {
-; CHECK-LABEL: zext_v16i16_v16i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: add z0.h, z0.h, z0.h
-; CHECK-NEXT: add z1.h, z1.h, z1.h
-; CHECK-NEXT: uunpklo z2.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.s, z1.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: stp q2, q0, [x1, #32]
-; CHECK-NEXT: stp q3, q1, [x1]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v16i16_v16i32:
+; SVE: // %bb.0:
+; SVE-NEXT: ldp q1, q0, [x0]
+; SVE-NEXT: add z0.h, z0.h, z0.h
+; SVE-NEXT: add z1.h, z1.h, z1.h
+; SVE-NEXT: uunpklo z2.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z3.s, z1.h
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: uunpklo z1.s, z1.h
+; SVE-NEXT: stp q2, q0, [x1, #32]
+; SVE-NEXT: stp q3, q1, [x1]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v16i16_v16i32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: ldp q1, q0, [x0]
+; SVE2-NEXT: add z2.h, z0.h, z0.h
+; SVE2-NEXT: add z0.h, z1.h, z1.h
+; SVE2-NEXT: ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z2.s, z2.h
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: uunpklo z3.s, z4.h
+; SVE2-NEXT: uunpklo z1.s, z5.h
+; SVE2-NEXT: stp q0, q1, [x1]
+; SVE2-NEXT: stp q2, q3, [x1, #32]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v16i16_v16i32:
; NONEON-NOSVE: // %bb.0:
@@ -2571,15 +3118,25 @@ define void @zext_v16i16_v16i32(ptr %in, ptr %out) {
;
define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
-; CHECK-LABEL: zext_v4i16_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z1.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v4i16_v4i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: uunpklo z1.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.d, z0.s
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v4i16_v4i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: uunpklo z1.d, z2.s
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v4i16_v4i64:
; NONEON-NOSVE: // %bb.0:
@@ -2604,21 +3161,37 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
}
define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
-; CHECK-LABEL: zext_v8i16_v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z1.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z2.d, z1.s
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: stp q2, q1, [x0]
-; CHECK-NEXT: stp q3, q0, [x0, #32]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v8i16_v8i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT: uunpklo z1.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: uunpklo z2.d, z1.s
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: uunpklo z3.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z1.d, z1.s
+; SVE-NEXT: uunpklo z0.d, z0.s
+; SVE-NEXT: stp q2, q1, [x0]
+; SVE-NEXT: stp q3, q0, [x0, #32]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v8i16_v8i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.s, z0.h
+; SVE2-NEXT: uunpklo z2.s, z2.h
+; SVE2-NEXT: ext z4.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: ext z5.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: uunpklo z2.d, z2.s
+; SVE2-NEXT: uunpklo z1.d, z4.s
+; SVE2-NEXT: uunpklo z3.d, z5.s
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: stp q2, q3, [x0, #32]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v8i16_v8i64:
; NONEON-NOSVE: // %bb.0:
@@ -2654,34 +3227,63 @@ define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
}
define void @zext_v16i16_v16i64(ptr %in, ptr %out) {
-; CHECK-LABEL: zext_v16i16_v16i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: add z0.h, z0.h, z0.h
-; CHECK-NEXT: add z1.h, z1.h, z1.h
-; CHECK-NEXT: uunpklo z2.s, z0.h
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.s, z1.h
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z4.d, z2.s
-; CHECK-NEXT: uunpklo z5.d, z3.s
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: uunpklo z6.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z7.d, z1.s
-; CHECK-NEXT: uunpklo z2.d, z2.s
-; CHECK-NEXT: uunpklo z3.d, z3.s
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: stp q5, q3, [x1]
-; CHECK-NEXT: stp q4, q2, [x1, #64]
-; CHECK-NEXT: stp q6, q0, [x1, #96]
-; CHECK-NEXT: stp q7, q1, [x1, #32]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v16i16_v16i64:
+; SVE: // %bb.0:
+; SVE-NEXT: ldp q1, q0, [x0]
+; SVE-NEXT: add z0.h, z0.h, z0.h
+; SVE-NEXT: add z1.h, z1.h, z1.h
+; SVE-NEXT: uunpklo z2.s, z0.h
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z3.s, z1.h
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.s, z0.h
+; SVE-NEXT: uunpklo z4.d, z2.s
+; SVE-NEXT: uunpklo z5.d, z3.s
+; SVE-NEXT: uunpklo z1.s, z1.h
+; SVE-NEXT: ext z2.b, z2.b, z0.b, #8
+; SVE-NEXT: ext z3.b, z3.b, z0.b, #8
+; SVE-NEXT: uunpklo z6.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z7.d, z1.s
+; SVE-NEXT: uunpklo z2.d, z2.s
+; SVE-NEXT: uunpklo z3.d, z3.s
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.d, z0.s
+; SVE-NEXT: uunpklo z1.d, z1.s
+; SVE-NEXT: stp q5, q3, [x1]
+; SVE-NEXT: stp q4, q2, [x1, #64]
+; SVE-NEXT: stp q6, q0, [x1, #96]
+; SVE-NEXT: stp q7, q1, [x1, #32]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v16i16_v16i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: ldp q1, q0, [x0]
+; SVE2-NEXT: add z2.h, z0.h, z0.h
+; SVE2-NEXT: add z0.h, z1.h, z1.h
+; SVE2-NEXT: ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: uunpklo z2.s, z2.h
+; SVE2-NEXT: uunpklo z5.s, z0.h
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z16.s, z4.h
+; SVE2-NEXT: ext z1.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: uunpklo z18.s, z0.h
+; SVE2-NEXT: ext z0.b, { z5.b, z6.b }, #8
+; SVE2-NEXT: uunpklo z2.d, z2.s
+; SVE2-NEXT: uunpklo z3.d, z5.s
+; SVE2-NEXT: uunpklo z1.d, z1.s
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: ext z4.b, { z16.b, z17.b }, #8
+; SVE2-NEXT: ext z5.b, { z18.b, z19.b }, #8
+; SVE2-NEXT: uunpklo z6.d, z16.s
+; SVE2-NEXT: stp q3, q0, [x1]
+; SVE2-NEXT: uunpklo z3.d, z18.s
+; SVE2-NEXT: stp q2, q1, [x1, #64]
+; SVE2-NEXT: uunpklo z2.d, z4.s
+; SVE2-NEXT: uunpklo z1.d, z5.s
+; SVE2-NEXT: stp q3, q1, [x1, #32]
+; SVE2-NEXT: stp q6, q2, [x1, #96]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v16i16_v16i64:
; NONEON-NOSVE: // %bb.0:
@@ -2777,14 +3379,23 @@ define void @zext_v16i16_v16i64(ptr %in, ptr %out) {
;
define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
-; CHECK-LABEL: zext_v4i32_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z1.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: stp q1, q0, [x0]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v4i32_v4i64:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT: uunpklo z1.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.d, z0.s
+; SVE-NEXT: stp q1, q0, [x0]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v4i32_v4i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0_z1
+; SVE2-NEXT: ext z2.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: uunpklo z1.d, z2.s
+; SVE2-NEXT: stp q0, q1, [x0]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v4i32_v4i64:
; NONEON-NOSVE: // %bb.0:
@@ -2808,20 +3419,35 @@ define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
}
define void @zext_v8i32_v8i64(ptr %in, ptr %out) {
-; CHECK-LABEL: zext_v8i32_v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: add z0.s, z0.s, z0.s
-; CHECK-NEXT: add z1.s, z1.s, z1.s
-; CHECK-NEXT: uunpklo z2.d, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.d, z1.s
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: stp q2, q0, [x1, #32]
-; CHECK-NEXT: stp q3, q1, [x1]
-; CHECK-NEXT: ret
+; SVE-LABEL: zext_v8i32_v8i64:
+; SVE: // %bb.0:
+; SVE-NEXT: ldp q1, q0, [x0]
+; SVE-NEXT: add z0.s, z0.s, z0.s
+; SVE-NEXT: add z1.s, z1.s, z1.s
+; SVE-NEXT: uunpklo z2.d, z0.s
+; SVE-NEXT: ext z0.b, z0.b, z0.b, #8
+; SVE-NEXT: uunpklo z3.d, z1.s
+; SVE-NEXT: ext z1.b, z1.b, z0.b, #8
+; SVE-NEXT: uunpklo z0.d, z0.s
+; SVE-NEXT: uunpklo z1.d, z1.s
+; SVE-NEXT: stp q2, q0, [x1, #32]
+; SVE-NEXT: stp q3, q1, [x1]
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: zext_v8i32_v8i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: ldp q1, q0, [x0]
+; SVE2-NEXT: add z2.s, z0.s, z0.s
+; SVE2-NEXT: add z0.s, z1.s, z1.s
+; SVE2-NEXT: ext z4.b, { z2.b, z3.b }, #8
+; SVE2-NEXT: ext z5.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: uunpklo z2.d, z2.s
+; SVE2-NEXT: uunpklo z0.d, z0.s
+; SVE2-NEXT: uunpklo z3.d, z4.s
+; SVE2-NEXT: uunpklo z1.d, z5.s
+; SVE2-NEXT: stp q0, q1, [x1]
+; SVE2-NEXT: stp q2, q3, [x1, #32]
+; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: zext_v8i32_v8i64:
; NONEON-NOSVE: // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
index bffef13..d880eba 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
@@ -64,18 +64,18 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: sunpklo z2.h, z1.b
-; CHECK-NEXT: sunpklo z3.h, z0.b
+; CHECK-NEXT: sunpklo z4.h, z0.b
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: sunpklo z4.s, z2.h
-; CHECK-NEXT: sunpklo z5.s, z3.h
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
+; CHECK-NEXT: sunpklo z6.s, z2.h
+; CHECK-NEXT: ext z2.b, { z2.b, z3.b }, #8
+; CHECK-NEXT: ext z3.b, { z4.b, z5.b }, #8
+; CHECK-NEXT: sunpklo z7.s, z4.h
; CHECK-NEXT: sunpklo z2.s, z2.h
; CHECK-NEXT: sunpklo z3.s, z3.h
-; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
+; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s
; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z3.h, z6.h, z6.h
; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
; CHECK-NEXT: splice z2.h, p0, { z3.h, z4.h }
; CHECK-NEXT: ptrue p0.b, vl8
@@ -139,46 +139,44 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-LABEL: srem_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: sunpklo z2.h, z1.b
-; CHECK-NEXT: sunpklo z3.h, z0.b
+; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT: sunpklo z5.h, z1.b
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: sunpklo z4.s, z2.h
-; CHECK-NEXT: sunpklo z5.s, z3.h
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: sunpklo z2.s, z2.h
-; CHECK-NEXT: sunpklo z3.s, z3.h
-; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
-; CHECK-NEXT: mov z5.d, z0.d
-; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT: sunpklo z5.h, z5.b
-; CHECK-NEXT: sunpklo z7.s, z5.h
-; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: mov z3.d, z1.d
+; CHECK-NEXT: sunpklo z16.h, z3.b
+; CHECK-NEXT: sunpklo z0.s, z5.h
+; CHECK-NEXT: ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT: ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: sunpklo z7.s, z16.h
; CHECK-NEXT: sunpklo z5.s, z5.h
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.h, z3.b
-; CHECK-NEXT: sunpklo z6.s, z3.h
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.s, z3.h
-; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s
-; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT: sunpklo z6.s, z6.h
+; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z7.s
+; CHECK-NEXT: ext z7.b, { z3.b, z4.b }, #8
+; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s
+; CHECK-NEXT: ext z6.b, { z1.b, z2.b }, #8
+; CHECK-NEXT: sunpklo z16.h, z6.b
+; CHECK-NEXT: sunpklo z6.h, z7.b
+; CHECK-NEXT: sunpklo z18.s, z16.h
+; CHECK-NEXT: sunpklo z19.s, z6.h
+; CHECK-NEXT: ext z16.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: ext z6.b, { z6.b, z7.b }, #8
+; CHECK-NEXT: sunpklo z7.s, z16.h
+; CHECK-NEXT: uzp1 z16.h, z0.h, z0.h
+; CHECK-NEXT: sunpklo z6.s, z6.h
+; CHECK-NEXT: sdivr z18.s, p0/m, z18.s, z19.s
+; CHECK-NEXT: uzp1 z17.h, z5.h, z5.h
+; CHECK-NEXT: sdiv z6.s, p0/m, z6.s, z7.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
-; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b
-; CHECK-NEXT: uzp1 z7.h, z3.h, z3.h
-; CHECK-NEXT: splice z3.h, p0, { z6.h, z7.h }
+; CHECK-NEXT: splice z0.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h
+; CHECK-NEXT: uzp1 z19.h, z6.h, z6.h
+; CHECK-NEXT: uzp1 z6.b, z0.b, z0.b
+; CHECK-NEXT: splice z5.h, p0, { z18.h, z19.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z5.b, z3.b, z3.b
-; CHECK-NEXT: splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT: uzp1 z7.b, z5.b, z5.b
+; CHECK-NEXT: splice z0.b, p0, { z6.b, z7.b }
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
+; CHECK-NEXT: msb z0.b, p0/m, z1.b, z3.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -277,84 +275,80 @@ define void @srem_v32i8(ptr %a, ptr %b) {
; CHECK-LABEL: srem_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0, #16]
-; CHECK-NEXT: ldr q1, [x1, #16]
+; CHECK-NEXT: ldr q2, [x1, #16]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: sunpklo z3.h, z1.b
-; CHECK-NEXT: sunpklo z4.h, z0.b
-; CHECK-NEXT: sunpklo z2.s, z3.h
-; CHECK-NEXT: sunpklo z5.s, z4.h
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: sunpklo z3.s, z3.h
-; CHECK-NEXT: sunpklo z4.s, z4.h
-; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z5.s
-; CHECK-NEXT: movprfx z5, z4
-; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z3.s
-; CHECK-NEXT: mov z3.d, z1.d
-; CHECK-NEXT: mov z4.d, z0.d
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: sunpklo z7.h, z3.b
-; CHECK-NEXT: sunpklo z16.h, z4.b
-; CHECK-NEXT: sunpklo z3.s, z7.h
-; CHECK-NEXT: sunpklo z4.s, z16.h
-; CHECK-NEXT: ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
-; CHECK-NEXT: sunpklo z7.s, z7.h
-; CHECK-NEXT: movprfx z6, z4
-; CHECK-NEXT: sdiv z6.s, p0/m, z6.s, z3.s
-; CHECK-NEXT: ldr q3, [x0]
-; CHECK-NEXT: ldr q4, [x1]
-; CHECK-NEXT: sunpklo z16.s, z16.h
-; CHECK-NEXT: sunpklo z17.h, z4.b
-; CHECK-NEXT: sunpklo z18.h, z3.b
-; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT: sunpklo z19.s, z17.h
-; CHECK-NEXT: sunpklo z20.s, z18.h
-; CHECK-NEXT: ext z17.b, z17.b, z0.b, #8
-; CHECK-NEXT: ext z18.b, z18.b, z0.b, #8
-; CHECK-NEXT: sunpklo z17.s, z17.h
-; CHECK-NEXT: sunpklo z18.s, z18.h
-; CHECK-NEXT: sdivr z19.s, p0/m, z19.s, z20.s
-; CHECK-NEXT: mov z20.d, z3.d
-; CHECK-NEXT: ext z20.b, z20.b, z0.b, #8
-; CHECK-NEXT: sunpklo z20.h, z20.b
-; CHECK-NEXT: sunpklo z22.s, z20.h
-; CHECK-NEXT: ext z20.b, z20.b, z0.b, #8
-; CHECK-NEXT: sdivr z17.s, p0/m, z17.s, z18.s
-; CHECK-NEXT: mov z18.d, z4.d
+; CHECK-NEXT: sunpklo z5.h, z2.b
+; CHECK-NEXT: sunpklo z16.h, z0.b
+; CHECK-NEXT: sunpklo z4.s, z5.h
+; CHECK-NEXT: ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT: ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: sunpklo z7.s, z16.h
+; CHECK-NEXT: ldr q16, [x1]
+; CHECK-NEXT: sunpklo z5.s, z5.h
+; CHECK-NEXT: sunpklo z6.s, z6.h
+; CHECK-NEXT: sunpklo z23.h, z16.b
+; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z7.s
+; CHECK-NEXT: movprfx z7, z6
+; CHECK-NEXT: sdiv z7.s, p0/m, z7.s, z5.s
+; CHECK-NEXT: ext z5.b, { z2.b, z3.b }, #8
+; CHECK-NEXT: ext z6.b, { z0.b, z1.b }, #8
+; CHECK-NEXT: sunpklo z27.s, z23.h
+; CHECK-NEXT: ext z23.b, { z23.b, z24.b }, #8
+; CHECK-NEXT: sunpklo z19.h, z5.b
+; CHECK-NEXT: sunpklo z21.h, z6.b
+; CHECK-NEXT: sunpklo z23.s, z23.h
+; CHECK-NEXT: sunpklo z5.s, z19.h
+; CHECK-NEXT: sunpklo z6.s, z21.h
+; CHECK-NEXT: ext z19.b, { z19.b, z20.b }, #8
+; CHECK-NEXT: ext z20.b, { z21.b, z22.b }, #8
+; CHECK-NEXT: sunpklo z19.s, z19.h
+; CHECK-NEXT: movprfx z18, z6
+; CHECK-NEXT: sdiv z18.s, p0/m, z18.s, z5.s
+; CHECK-NEXT: ldr q5, [x0]
; CHECK-NEXT: sunpklo z20.s, z20.h
-; CHECK-NEXT: ext z18.b, z18.b, z0.b, #8
-; CHECK-NEXT: sunpklo z18.h, z18.b
-; CHECK-NEXT: sunpklo z21.s, z18.h
-; CHECK-NEXT: ext z18.b, z18.b, z0.b, #8
-; CHECK-NEXT: sunpklo z18.s, z18.h
-; CHECK-NEXT: sdivr z21.s, p0/m, z21.s, z22.s
-; CHECK-NEXT: uzp1 z22.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z23.h, z5.h, z5.h
-; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h
-; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT: sdivr z18.s, p0/m, z18.s, z20.s
-; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h
+; CHECK-NEXT: sunpklo z25.h, z5.b
+; CHECK-NEXT: sdivr z19.s, p0/m, z19.s, z20.s
+; CHECK-NEXT: uzp1 z20.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z21.h, z7.h, z7.h
+; CHECK-NEXT: ext z24.b, { z25.b, z26.b }, #8
+; CHECK-NEXT: sunpklo z28.s, z25.h
+; CHECK-NEXT: ext z25.b, { z5.b, z6.b }, #8
+; CHECK-NEXT: sunpklo z24.s, z24.h
+; CHECK-NEXT: sdivr z23.s, p0/m, z23.s, z24.s
+; CHECK-NEXT: ext z24.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: sdivr z27.s, p0/m, z27.s, z28.s
+; CHECK-NEXT: sunpklo z28.h, z24.b
+; CHECK-NEXT: sunpklo z24.h, z25.b
+; CHECK-NEXT: sunpklo z26.s, z28.h
+; CHECK-NEXT: sunpklo z30.s, z24.h
+; CHECK-NEXT: ext z28.b, { z28.b, z29.b }, #8
+; CHECK-NEXT: ext z24.b, { z24.b, z25.b }, #8
+; CHECK-NEXT: sunpklo z25.s, z28.h
+; CHECK-NEXT: sunpklo z24.s, z24.h
+; CHECK-NEXT: sdivr z26.s, p0/m, z26.s, z30.s
+; CHECK-NEXT: uzp1 z27.h, z27.h, z27.h
+; CHECK-NEXT: uzp1 z28.h, z23.h, z23.h
+; CHECK-NEXT: sdiv z24.s, p0/m, z24.s, z25.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h
-; CHECK-NEXT: splice z7.h, p0, { z22.h, z23.h }
-; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h }
-; CHECK-NEXT: uzp1 z16.h, z21.h, z21.h
-; CHECK-NEXT: splice z2.h, p0, { z19.h, z20.h }
-; CHECK-NEXT: uzp1 z6.b, z7.b, z7.b
-; CHECK-NEXT: uzp1 z7.b, z5.b, z5.b
-; CHECK-NEXT: uzp1 z17.h, z18.h, z18.h
-; CHECK-NEXT: splice z16.h, p0, { z16.h, z17.h }
-; CHECK-NEXT: uzp1 z17.b, z2.b, z2.b
+; CHECK-NEXT: splice z4.h, p0, { z27.h, z28.h }
+; CHECK-NEXT: splice z7.h, p0, { z20.h, z21.h }
+; CHECK-NEXT: uzp1 z22.h, z26.h, z26.h
+; CHECK-NEXT: uzp1 z20.b, z4.b, z4.b
+; CHECK-NEXT: uzp1 z23.h, z24.h, z24.h
+; CHECK-NEXT: uzp1 z24.h, z18.h, z18.h
+; CHECK-NEXT: uzp1 z25.h, z19.h, z19.h
+; CHECK-NEXT: splice z18.h, p0, { z22.h, z23.h }
+; CHECK-NEXT: uzp1 z22.b, z7.b, z7.b
+; CHECK-NEXT: splice z19.h, p0, { z24.h, z25.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: splice z5.b, p0, { z6.b, z7.b }
-; CHECK-NEXT: uzp1 z18.b, z16.b, z16.b
-; CHECK-NEXT: splice z2.b, p0, { z17.b, z18.b }
+; CHECK-NEXT: uzp1 z21.b, z18.b, z18.b
+; CHECK-NEXT: uzp1 z23.b, z19.b, z19.b
+; CHECK-NEXT: splice z4.b, p0, { z20.b, z21.b }
+; CHECK-NEXT: splice z7.b, p0, { z22.b, z23.b }
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: mls z0.b, p0/m, z5.b, z1.b
-; CHECK-NEXT: msb z2.b, p0/m, z4.b, z3.b
-; CHECK-NEXT: stp q2, q0, [x0]
+; CHECK-NEXT: msb z4.b, p0/m, z16.b, z5.b
+; CHECK-NEXT: mls z0.b, p0/m, z7.b, z2.b
+; CHECK-NEXT: stp q4, q0, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v32i8:
@@ -586,25 +580,23 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-LABEL: srem_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: sunpklo z2.s, z1.h
-; CHECK-NEXT: sunpklo z3.s, z0.h
+; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT: sunpklo z0.s, z1.h
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: mov z4.d, z0.d
-; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: mov z3.d, z1.d
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: sunpklo z4.s, z4.h
-; CHECK-NEXT: sunpklo z3.s, z3.h
-; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT: sunpklo z5.s, z3.h
+; CHECK-NEXT: ext z6.b, { z3.b, z4.b }, #8
+; CHECK-NEXT: sunpklo z6.s, z6.h
+; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT: ext z5.b, { z1.b, z2.b }, #8
+; CHECK-NEXT: sunpklo z5.s, z5.h
+; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: uzp1 z6.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z7.h, z5.h, z5.h
+; CHECK-NEXT: splice z0.h, p0, { z6.h, z7.h }
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
+; CHECK-NEXT: msb z0.h, p0/m, z1.h, z3.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -662,41 +654,37 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
define void @srem_v16i16(ptr %a, ptr %b) {
; CHECK-LABEL: srem_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q4, q1, [x1]
+; CHECK-NEXT: ldp q16, q2, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldr q0, [x0, #16]
-; CHECK-NEXT: sunpklo z2.s, z1.h
-; CHECK-NEXT: sunpklo z3.s, z0.h
-; CHECK-NEXT: sunpklo z5.s, z4.h
-; CHECK-NEXT: mov z16.d, z0.d
-; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: ldr q3, [x0]
-; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
-; CHECK-NEXT: sunpklo z6.s, z3.h
-; CHECK-NEXT: mov z7.d, z3.d
-; CHECK-NEXT: sunpklo z16.s, z16.h
-; CHECK-NEXT: ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT: sunpklo z7.s, z7.h
-; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT: mov z6.d, z4.d
-; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT: sunpklo z6.s, z6.h
-; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s
-; CHECK-NEXT: mov z7.d, z1.d
-; CHECK-NEXT: ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT: sunpklo z7.s, z7.h
-; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT: uzp1 z16.h, z5.h, z5.h
+; CHECK-NEXT: sunpklo z4.s, z2.h
+; CHECK-NEXT: sunpklo z5.s, z0.h
+; CHECK-NEXT: sunpklo z7.s, z16.h
+; CHECK-NEXT: ext z20.b, { z0.b, z1.b }, #8
+; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
+; CHECK-NEXT: ldr q5, [x0]
+; CHECK-NEXT: sunpklo z20.s, z20.h
+; CHECK-NEXT: sunpklo z18.s, z5.h
+; CHECK-NEXT: ext z19.b, { z5.b, z6.b }, #8
+; CHECK-NEXT: sunpklo z19.s, z19.h
+; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z18.s
+; CHECK-NEXT: ext z18.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: sunpklo z18.s, z18.h
+; CHECK-NEXT: uzp1 z22.h, z4.h, z4.h
+; CHECK-NEXT: sdivr z18.s, p0/m, z18.s, z19.s
+; CHECK-NEXT: ext z19.b, { z2.b, z3.b }, #8
+; CHECK-NEXT: sunpklo z19.s, z19.h
+; CHECK-NEXT: sdivr z19.s, p0/m, z19.s, z20.s
+; CHECK-NEXT: uzp1 z20.h, z7.h, z7.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z17.h, z6.h, z6.h
-; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT: splice z2.h, p0, { z16.h, z17.h }
-; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h }
+; CHECK-NEXT: uzp1 z21.h, z18.h, z18.h
+; CHECK-NEXT: splice z4.h, p0, { z20.h, z21.h }
+; CHECK-NEXT: uzp1 z23.h, z19.h, z19.h
+; CHECK-NEXT: splice z7.h, p0, { z22.h, z23.h }
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: msb z2.h, p0/m, z4.h, z3.h
-; CHECK-NEXT: mls z0.h, p0/m, z5.h, z1.h
-; CHECK-NEXT: stp q2, q0, [x0]
+; CHECK-NEXT: msb z4.h, p0/m, z16.h, z5.h
+; CHECK-NEXT: mls z0.h, p0/m, z7.h, z2.h
+; CHECK-NEXT: stp q4, q0, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v16i16:
@@ -1114,18 +1102,18 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: uunpklo z2.h, z1.b
-; CHECK-NEXT: uunpklo z3.h, z0.b
+; CHECK-NEXT: uunpklo z4.h, z0.b
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: uunpklo z4.s, z2.h
-; CHECK-NEXT: uunpklo z5.s, z3.h
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
+; CHECK-NEXT: uunpklo z6.s, z2.h
+; CHECK-NEXT: ext z2.b, { z2.b, z3.b }, #8
+; CHECK-NEXT: ext z3.b, { z4.b, z5.b }, #8
+; CHECK-NEXT: uunpklo z7.s, z4.h
; CHECK-NEXT: uunpklo z2.s, z2.h
; CHECK-NEXT: uunpklo z3.s, z3.h
-; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s
+; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s
; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z3.h, z6.h, z6.h
; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
; CHECK-NEXT: splice z2.h, p0, { z3.h, z4.h }
; CHECK-NEXT: ptrue p0.b, vl8
@@ -1189,46 +1177,44 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-LABEL: urem_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z2.h, z1.b
-; CHECK-NEXT: uunpklo z3.h, z0.b
+; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT: uunpklo z5.h, z1.b
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: uunpklo z4.s, z2.h
-; CHECK-NEXT: uunpklo z5.s, z3.h
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: uunpklo z2.s, z2.h
-; CHECK-NEXT: uunpklo z3.s, z3.h
-; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s
-; CHECK-NEXT: mov z5.d, z0.d
-; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT: uunpklo z5.h, z5.b
-; CHECK-NEXT: uunpklo z7.s, z5.h
-; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8
-; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: mov z3.d, z1.d
+; CHECK-NEXT: uunpklo z16.h, z3.b
+; CHECK-NEXT: uunpklo z0.s, z5.h
+; CHECK-NEXT: ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT: ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: uunpklo z7.s, z16.h
; CHECK-NEXT: uunpklo z5.s, z5.h
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.h, z3.b
-; CHECK-NEXT: uunpklo z6.s, z3.h
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.s, z3.h
-; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s
-; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT: uunpklo z6.s, z6.h
+; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z7.s
+; CHECK-NEXT: ext z7.b, { z3.b, z4.b }, #8
+; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s
+; CHECK-NEXT: ext z6.b, { z1.b, z2.b }, #8
+; CHECK-NEXT: uunpklo z16.h, z6.b
+; CHECK-NEXT: uunpklo z6.h, z7.b
+; CHECK-NEXT: uunpklo z18.s, z16.h
+; CHECK-NEXT: uunpklo z19.s, z6.h
+; CHECK-NEXT: ext z16.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: ext z6.b, { z6.b, z7.b }, #8
+; CHECK-NEXT: uunpklo z7.s, z16.h
+; CHECK-NEXT: uzp1 z16.h, z0.h, z0.h
+; CHECK-NEXT: uunpklo z6.s, z6.h
+; CHECK-NEXT: udivr z18.s, p0/m, z18.s, z19.s
+; CHECK-NEXT: uzp1 z17.h, z5.h, z5.h
+; CHECK-NEXT: udiv z6.s, p0/m, z6.s, z7.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
-; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b
-; CHECK-NEXT: uzp1 z7.h, z3.h, z3.h
-; CHECK-NEXT: splice z3.h, p0, { z6.h, z7.h }
+; CHECK-NEXT: splice z0.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h
+; CHECK-NEXT: uzp1 z19.h, z6.h, z6.h
+; CHECK-NEXT: uzp1 z6.b, z0.b, z0.b
+; CHECK-NEXT: splice z5.h, p0, { z18.h, z19.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z5.b, z3.b, z3.b
-; CHECK-NEXT: splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT: uzp1 z7.b, z5.b, z5.b
+; CHECK-NEXT: splice z0.b, p0, { z6.b, z7.b }
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
+; CHECK-NEXT: msb z0.b, p0/m, z1.b, z3.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -1327,84 +1313,80 @@ define void @urem_v32i8(ptr %a, ptr %b) {
; CHECK-LABEL: urem_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0, #16]
-; CHECK-NEXT: ldr q1, [x1, #16]
+; CHECK-NEXT: ldr q2, [x1, #16]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: uunpklo z3.h, z1.b
-; CHECK-NEXT: uunpklo z4.h, z0.b
-; CHECK-NEXT: uunpklo z2.s, z3.h
-; CHECK-NEXT: uunpklo z5.s, z4.h
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: uunpklo z3.s, z3.h
-; CHECK-NEXT: uunpklo z4.s, z4.h
-; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z5.s
-; CHECK-NEXT: movprfx z5, z4
-; CHECK-NEXT: udiv z5.s, p0/m, z5.s, z3.s
-; CHECK-NEXT: mov z3.d, z1.d
-; CHECK-NEXT: mov z4.d, z0.d
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: uunpklo z7.h, z3.b
-; CHECK-NEXT: uunpklo z16.h, z4.b
-; CHECK-NEXT: uunpklo z3.s, z7.h
-; CHECK-NEXT: uunpklo z4.s, z16.h
-; CHECK-NEXT: ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
-; CHECK-NEXT: uunpklo z7.s, z7.h
-; CHECK-NEXT: movprfx z6, z4
-; CHECK-NEXT: udiv z6.s, p0/m, z6.s, z3.s
-; CHECK-NEXT: ldr q3, [x0]
-; CHECK-NEXT: ldr q4, [x1]
-; CHECK-NEXT: uunpklo z16.s, z16.h
-; CHECK-NEXT: uunpklo z17.h, z4.b
-; CHECK-NEXT: uunpklo z18.h, z3.b
-; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT: uunpklo z19.s, z17.h
-; CHECK-NEXT: uunpklo z20.s, z18.h
-; CHECK-NEXT: ext z17.b, z17.b, z0.b, #8
-; CHECK-NEXT: ext z18.b, z18.b, z0.b, #8
-; CHECK-NEXT: uunpklo z17.s, z17.h
-; CHECK-NEXT: uunpklo z18.s, z18.h
-; CHECK-NEXT: udivr z19.s, p0/m, z19.s, z20.s
-; CHECK-NEXT: mov z20.d, z3.d
-; CHECK-NEXT: ext z20.b, z20.b, z0.b, #8
-; CHECK-NEXT: uunpklo z20.h, z20.b
-; CHECK-NEXT: uunpklo z22.s, z20.h
-; CHECK-NEXT: ext z20.b, z20.b, z0.b, #8
-; CHECK-NEXT: udivr z17.s, p0/m, z17.s, z18.s
-; CHECK-NEXT: mov z18.d, z4.d
+; CHECK-NEXT: uunpklo z5.h, z2.b
+; CHECK-NEXT: uunpklo z16.h, z0.b
+; CHECK-NEXT: uunpklo z4.s, z5.h
+; CHECK-NEXT: ext z5.b, { z5.b, z6.b }, #8
+; CHECK-NEXT: ext z6.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: uunpklo z7.s, z16.h
+; CHECK-NEXT: ldr q16, [x1]
+; CHECK-NEXT: uunpklo z5.s, z5.h
+; CHECK-NEXT: uunpklo z6.s, z6.h
+; CHECK-NEXT: uunpklo z23.h, z16.b
+; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z7.s
+; CHECK-NEXT: movprfx z7, z6
+; CHECK-NEXT: udiv z7.s, p0/m, z7.s, z5.s
+; CHECK-NEXT: ext z5.b, { z2.b, z3.b }, #8
+; CHECK-NEXT: ext z6.b, { z0.b, z1.b }, #8
+; CHECK-NEXT: uunpklo z27.s, z23.h
+; CHECK-NEXT: ext z23.b, { z23.b, z24.b }, #8
+; CHECK-NEXT: uunpklo z19.h, z5.b
+; CHECK-NEXT: uunpklo z21.h, z6.b
+; CHECK-NEXT: uunpklo z23.s, z23.h
+; CHECK-NEXT: uunpklo z5.s, z19.h
+; CHECK-NEXT: uunpklo z6.s, z21.h
+; CHECK-NEXT: ext z19.b, { z19.b, z20.b }, #8
+; CHECK-NEXT: ext z20.b, { z21.b, z22.b }, #8
+; CHECK-NEXT: uunpklo z19.s, z19.h
+; CHECK-NEXT: movprfx z18, z6
+; CHECK-NEXT: udiv z18.s, p0/m, z18.s, z5.s
+; CHECK-NEXT: ldr q5, [x0]
; CHECK-NEXT: uunpklo z20.s, z20.h
-; CHECK-NEXT: ext z18.b, z18.b, z0.b, #8
-; CHECK-NEXT: uunpklo z18.h, z18.b
-; CHECK-NEXT: uunpklo z21.s, z18.h
-; CHECK-NEXT: ext z18.b, z18.b, z0.b, #8
-; CHECK-NEXT: uunpklo z18.s, z18.h
-; CHECK-NEXT: udivr z21.s, p0/m, z21.s, z22.s
-; CHECK-NEXT: uzp1 z22.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z23.h, z5.h, z5.h
-; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h
-; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT: udivr z18.s, p0/m, z18.s, z20.s
-; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h
+; CHECK-NEXT: uunpklo z25.h, z5.b
+; CHECK-NEXT: udivr z19.s, p0/m, z19.s, z20.s
+; CHECK-NEXT: uzp1 z20.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z21.h, z7.h, z7.h
+; CHECK-NEXT: ext z24.b, { z25.b, z26.b }, #8
+; CHECK-NEXT: uunpklo z28.s, z25.h
+; CHECK-NEXT: ext z25.b, { z5.b, z6.b }, #8
+; CHECK-NEXT: uunpklo z24.s, z24.h
+; CHECK-NEXT: udivr z23.s, p0/m, z23.s, z24.s
+; CHECK-NEXT: ext z24.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: udivr z27.s, p0/m, z27.s, z28.s
+; CHECK-NEXT: uunpklo z28.h, z24.b
+; CHECK-NEXT: uunpklo z24.h, z25.b
+; CHECK-NEXT: uunpklo z26.s, z28.h
+; CHECK-NEXT: uunpklo z30.s, z24.h
+; CHECK-NEXT: ext z28.b, { z28.b, z29.b }, #8
+; CHECK-NEXT: ext z24.b, { z24.b, z25.b }, #8
+; CHECK-NEXT: uunpklo z25.s, z28.h
+; CHECK-NEXT: uunpklo z24.s, z24.h
+; CHECK-NEXT: udivr z26.s, p0/m, z26.s, z30.s
+; CHECK-NEXT: uzp1 z27.h, z27.h, z27.h
+; CHECK-NEXT: uzp1 z28.h, z23.h, z23.h
+; CHECK-NEXT: udiv z24.s, p0/m, z24.s, z25.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h
-; CHECK-NEXT: splice z7.h, p0, { z22.h, z23.h }
-; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h }
-; CHECK-NEXT: uzp1 z16.h, z21.h, z21.h
-; CHECK-NEXT: splice z2.h, p0, { z19.h, z20.h }
-; CHECK-NEXT: uzp1 z6.b, z7.b, z7.b
-; CHECK-NEXT: uzp1 z7.b, z5.b, z5.b
-; CHECK-NEXT: uzp1 z17.h, z18.h, z18.h
-; CHECK-NEXT: splice z16.h, p0, { z16.h, z17.h }
-; CHECK-NEXT: uzp1 z17.b, z2.b, z2.b
+; CHECK-NEXT: splice z4.h, p0, { z27.h, z28.h }
+; CHECK-NEXT: splice z7.h, p0, { z20.h, z21.h }
+; CHECK-NEXT: uzp1 z22.h, z26.h, z26.h
+; CHECK-NEXT: uzp1 z20.b, z4.b, z4.b
+; CHECK-NEXT: uzp1 z23.h, z24.h, z24.h
+; CHECK-NEXT: uzp1 z24.h, z18.h, z18.h
+; CHECK-NEXT: uzp1 z25.h, z19.h, z19.h
+; CHECK-NEXT: splice z18.h, p0, { z22.h, z23.h }
+; CHECK-NEXT: uzp1 z22.b, z7.b, z7.b
+; CHECK-NEXT: splice z19.h, p0, { z24.h, z25.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: splice z5.b, p0, { z6.b, z7.b }
-; CHECK-NEXT: uzp1 z18.b, z16.b, z16.b
-; CHECK-NEXT: splice z2.b, p0, { z17.b, z18.b }
+; CHECK-NEXT: uzp1 z21.b, z18.b, z18.b
+; CHECK-NEXT: uzp1 z23.b, z19.b, z19.b
+; CHECK-NEXT: splice z4.b, p0, { z20.b, z21.b }
+; CHECK-NEXT: splice z7.b, p0, { z22.b, z23.b }
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: mls z0.b, p0/m, z5.b, z1.b
-; CHECK-NEXT: msb z2.b, p0/m, z4.b, z3.b
-; CHECK-NEXT: stp q2, q0, [x0]
+; CHECK-NEXT: msb z4.b, p0/m, z16.b, z5.b
+; CHECK-NEXT: mls z0.b, p0/m, z7.b, z2.b
+; CHECK-NEXT: stp q4, q0, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v32i8:
@@ -1636,25 +1618,23 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-LABEL: urem_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z2.s, z1.h
-; CHECK-NEXT: uunpklo z3.s, z0.h
+; CHECK-NEXT: mov z3.d, z0.d
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1_z2
+; CHECK-NEXT: uunpklo z0.s, z1.h
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: mov z4.d, z0.d
-; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: mov z3.d, z1.d
-; CHECK-NEXT: ext z4.b, z4.b, z0.b, #8
-; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT: uunpklo z4.s, z4.h
-; CHECK-NEXT: uunpklo z3.s, z3.h
-; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT: uunpklo z5.s, z3.h
+; CHECK-NEXT: ext z6.b, { z3.b, z4.b }, #8
+; CHECK-NEXT: uunpklo z6.s, z6.h
+; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z5.s
+; CHECK-NEXT: ext z5.b, { z1.b, z2.b }, #8
+; CHECK-NEXT: uunpklo z5.s, z5.h
+; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: uzp1 z6.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z7.h, z5.h, z5.h
+; CHECK-NEXT: splice z0.h, p0, { z6.h, z7.h }
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
+; CHECK-NEXT: msb z0.h, p0/m, z1.h, z3.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -1712,41 +1692,37 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
define void @urem_v16i16(ptr %a, ptr %b) {
; CHECK-LABEL: urem_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q4, q1, [x1]
+; CHECK-NEXT: ldp q16, q2, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldr q0, [x0, #16]
-; CHECK-NEXT: uunpklo z2.s, z1.h
-; CHECK-NEXT: uunpklo z3.s, z0.h
-; CHECK-NEXT: uunpklo z5.s, z4.h
-; CHECK-NEXT: mov z16.d, z0.d
-; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: ldr q3, [x0]
-; CHECK-NEXT: ext z16.b, z16.b, z0.b, #8
-; CHECK-NEXT: uunpklo z6.s, z3.h
-; CHECK-NEXT: mov z7.d, z3.d
-; CHECK-NEXT: uunpklo z16.s, z16.h
-; CHECK-NEXT: ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT: uunpklo z7.s, z7.h
-; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT: mov z6.d, z4.d
-; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8
-; CHECK-NEXT: uunpklo z6.s, z6.h
-; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s
-; CHECK-NEXT: mov z7.d, z1.d
-; CHECK-NEXT: ext z7.b, z7.b, z0.b, #8
-; CHECK-NEXT: uunpklo z7.s, z7.h
-; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT: uzp1 z16.h, z5.h, z5.h
+; CHECK-NEXT: uunpklo z4.s, z2.h
+; CHECK-NEXT: uunpklo z5.s, z0.h
+; CHECK-NEXT: uunpklo z7.s, z16.h
+; CHECK-NEXT: ext z20.b, { z0.b, z1.b }, #8
+; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s
+; CHECK-NEXT: ldr q5, [x0]
+; CHECK-NEXT: uunpklo z20.s, z20.h
+; CHECK-NEXT: uunpklo z18.s, z5.h
+; CHECK-NEXT: ext z19.b, { z5.b, z6.b }, #8
+; CHECK-NEXT: uunpklo z19.s, z19.h
+; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z18.s
+; CHECK-NEXT: ext z18.b, { z16.b, z17.b }, #8
+; CHECK-NEXT: uunpklo z18.s, z18.h
+; CHECK-NEXT: uzp1 z22.h, z4.h, z4.h
+; CHECK-NEXT: udivr z18.s, p0/m, z18.s, z19.s
+; CHECK-NEXT: ext z19.b, { z2.b, z3.b }, #8
+; CHECK-NEXT: uunpklo z19.s, z19.h
+; CHECK-NEXT: udivr z19.s, p0/m, z19.s, z20.s
+; CHECK-NEXT: uzp1 z20.h, z7.h, z7.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z17.h, z6.h, z6.h
-; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT: splice z2.h, p0, { z16.h, z17.h }
-; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h }
+; CHECK-NEXT: uzp1 z21.h, z18.h, z18.h
+; CHECK-NEXT: splice z4.h, p0, { z20.h, z21.h }
+; CHECK-NEXT: uzp1 z23.h, z19.h, z19.h
+; CHECK-NEXT: splice z7.h, p0, { z22.h, z23.h }
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: msb z2.h, p0/m, z4.h, z3.h
-; CHECK-NEXT: mls z0.h, p0/m, z5.h, z1.h
-; CHECK-NEXT: stp q2, q0, [x0]
+; CHECK-NEXT: msb z4.h, p0/m, z16.h, z5.h
+; CHECK-NEXT: mls z0.h, p0/m, z7.h, z2.h
+; CHECK-NEXT: stp q4, q0, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v16i16:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
index 3627390..bfa4bc2 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -97,17 +97,17 @@ entry:
define <2 x i32> @test2(ptr %arg1, ptr %arg2) {
; CHECK-LABEL: test2:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldp q1, q0, [x0, #32]
-; CHECK-NEXT: ldp q3, q4, [x0]
-; CHECK-NEXT: add z2.s, z0.s, z0.s
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: add z1.s, z1.s, z1.s
-; CHECK-NEXT: add z3.s, z3.s, z3.s
-; CHECK-NEXT: add z4.s, z4.s, z4.s
-; CHECK-NEXT: mov z0.s, s0
-; CHECK-NEXT: stp q1, q2, [x0, #32]
-; CHECK-NEXT: stp q3, q4, [x0]
+; CHECK-NEXT: ldp q2, q0, [x0, #32]
+; CHECK-NEXT: ldp q4, q5, [x0]
+; CHECK-NEXT: ext z3.b, { z0.b, z1.b }, #8
+; CHECK-NEXT: add z2.s, z2.s, z2.s
+; CHECK-NEXT: add z1.s, z0.s, z0.s
+; CHECK-NEXT: mov z0.s, s3
+; CHECK-NEXT: add z3.s, z4.s, z4.s
+; CHECK-NEXT: add z4.s, z5.s, z5.s
+; CHECK-NEXT: stp q2, q1, [x0, #32]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: stp q3, q4, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: test2:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
index 93d6da1..1caf89f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve < %s | FileCheck %s
; RUN: llc -mattr=+dotprod,+sve < %s | FileCheck %s -check-prefix=DOT
-; RUN: llc -mattr=+dotprod,+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=STREAMING-SVE
+; RUN: llc -mattr=+dotprod,+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=STREAMING-SVE
; RUN: llc -mattr=+dotprod,+sme -force-streaming < %s | FileCheck %s --check-prefix=STREAMING-SVE
target triple = "aarch64-unknown-linux-gnu"
@@ -36,34 +36,33 @@ define i32 @reduce_uaddv_v16i8(<32 x i8> %a) {
;
; STREAMING-SVE-LABEL: reduce_uaddv_v16i8:
; STREAMING-SVE: // %bb.0:
-; STREAMING-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
-; STREAMING-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; STREAMING-SVE-NEXT: uunpklo z2.h, z1.b
-; STREAMING-SVE-NEXT: uunpklo z3.h, z0.b
+; STREAMING-SVE-NEXT: mov z3.d, z0.d
+; STREAMING-SVE-NEXT: // kill: def $q1 killed $q1 def $z1_z2
+; STREAMING-SVE-NEXT: ext z0.b, { z1.b, z2.b }, #8
; STREAMING-SVE-NEXT: ptrue p0.s, vl4
-; STREAMING-SVE-NEXT: ext z1.b, z1.b, z0.b, #8
-; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8
; STREAMING-SVE-NEXT: uunpklo z1.h, z1.b
-; STREAMING-SVE-NEXT: uunpklo z0.h, z0.b
-; STREAMING-SVE-NEXT: uunpklo z4.s, z2.h
-; STREAMING-SVE-NEXT: uunpklo z6.s, z3.h
-; STREAMING-SVE-NEXT: mov z5.d, z1.d
-; STREAMING-SVE-NEXT: ext z2.b, z2.b, z0.b, #8
-; STREAMING-SVE-NEXT: ext z3.b, z3.b, z0.b, #8
-; STREAMING-SVE-NEXT: uunpklo z7.s, z0.h
+; STREAMING-SVE-NEXT: ext z5.b, { z3.b, z4.b }, #8
+; STREAMING-SVE-NEXT: uunpklo z6.h, z0.b
+; STREAMING-SVE-NEXT: uunpklo z3.h, z3.b
+; STREAMING-SVE-NEXT: ext z0.b, { z1.b, z2.b }, #8
; STREAMING-SVE-NEXT: uunpklo z1.s, z1.h
-; STREAMING-SVE-NEXT: add z4.s, z6.s, z4.s
-; STREAMING-SVE-NEXT: ext z5.b, z5.b, z0.b, #8
-; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8
-; STREAMING-SVE-NEXT: uunpklo z2.s, z2.h
-; STREAMING-SVE-NEXT: uunpklo z3.s, z3.h
-; STREAMING-SVE-NEXT: add z1.s, z7.s, z1.s
-; STREAMING-SVE-NEXT: uunpklo z5.s, z5.h
+; STREAMING-SVE-NEXT: uunpklo z16.h, z5.b
+; STREAMING-SVE-NEXT: ext z5.b, { z6.b, z7.b }, #8
+; STREAMING-SVE-NEXT: ext z19.b, { z3.b, z4.b }, #8
+; STREAMING-SVE-NEXT: uunpklo z2.s, z3.h
; STREAMING-SVE-NEXT: uunpklo z0.s, z0.h
-; STREAMING-SVE-NEXT: add z2.s, z3.s, z2.s
-; STREAMING-SVE-NEXT: add z1.s, z4.s, z1.s
-; STREAMING-SVE-NEXT: add z0.s, z0.s, z5.s
-; STREAMING-SVE-NEXT: add z0.s, z2.s, z0.s
+; STREAMING-SVE-NEXT: uunpklo z6.s, z6.h
+; STREAMING-SVE-NEXT: ext z18.b, { z16.b, z17.b }, #8
+; STREAMING-SVE-NEXT: uunpklo z3.s, z5.h
+; STREAMING-SVE-NEXT: uunpklo z5.s, z19.h
+; STREAMING-SVE-NEXT: uunpklo z7.s, z16.h
+; STREAMING-SVE-NEXT: add z1.s, z2.s, z1.s
+; STREAMING-SVE-NEXT: uunpklo z4.s, z18.h
+; STREAMING-SVE-NEXT: add z0.s, z5.s, z0.s
+; STREAMING-SVE-NEXT: add z2.s, z7.s, z6.s
+; STREAMING-SVE-NEXT: add z3.s, z4.s, z3.s
+; STREAMING-SVE-NEXT: add z1.s, z1.s, z2.s
+; STREAMING-SVE-NEXT: add z0.s, z0.s, z3.s
; STREAMING-SVE-NEXT: add z0.s, z1.s, z0.s
; STREAMING-SVE-NEXT: uaddv d0, p0, z0.s
; STREAMING-SVE-NEXT: fmov w0, s0
@@ -103,34 +102,33 @@ define i32 @reduce_saddv_v16i8(<32 x i8> %a) {
;
; STREAMING-SVE-LABEL: reduce_saddv_v16i8:
; STREAMING-SVE: // %bb.0:
-; STREAMING-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
-; STREAMING-SVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; STREAMING-SVE-NEXT: sunpklo z2.h, z1.b
-; STREAMING-SVE-NEXT: sunpklo z3.h, z0.b
+; STREAMING-SVE-NEXT: mov z3.d, z0.d
+; STREAMING-SVE-NEXT: // kill: def $q1 killed $q1 def $z1_z2
+; STREAMING-SVE-NEXT: ext z0.b, { z1.b, z2.b }, #8
; STREAMING-SVE-NEXT: ptrue p0.s, vl4
-; STREAMING-SVE-NEXT: ext z1.b, z1.b, z0.b, #8
-; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8
; STREAMING-SVE-NEXT: sunpklo z1.h, z1.b
-; STREAMING-SVE-NEXT: sunpklo z0.h, z0.b
-; STREAMING-SVE-NEXT: sunpklo z4.s, z2.h
-; STREAMING-SVE-NEXT: sunpklo z6.s, z3.h
-; STREAMING-SVE-NEXT: mov z5.d, z1.d
-; STREAMING-SVE-NEXT: ext z2.b, z2.b, z0.b, #8
-; STREAMING-SVE-NEXT: ext z3.b, z3.b, z0.b, #8
-; STREAMING-SVE-NEXT: sunpklo z7.s, z0.h
+; STREAMING-SVE-NEXT: ext z5.b, { z3.b, z4.b }, #8
+; STREAMING-SVE-NEXT: sunpklo z6.h, z0.b
+; STREAMING-SVE-NEXT: sunpklo z3.h, z3.b
+; STREAMING-SVE-NEXT: ext z0.b, { z1.b, z2.b }, #8
; STREAMING-SVE-NEXT: sunpklo z1.s, z1.h
-; STREAMING-SVE-NEXT: add z4.s, z6.s, z4.s
-; STREAMING-SVE-NEXT: ext z5.b, z5.b, z0.b, #8
-; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8
-; STREAMING-SVE-NEXT: sunpklo z2.s, z2.h
-; STREAMING-SVE-NEXT: sunpklo z3.s, z3.h
-; STREAMING-SVE-NEXT: add z1.s, z7.s, z1.s
-; STREAMING-SVE-NEXT: sunpklo z5.s, z5.h
+; STREAMING-SVE-NEXT: sunpklo z16.h, z5.b
+; STREAMING-SVE-NEXT: ext z5.b, { z6.b, z7.b }, #8
+; STREAMING-SVE-NEXT: ext z19.b, { z3.b, z4.b }, #8
+; STREAMING-SVE-NEXT: sunpklo z2.s, z3.h
; STREAMING-SVE-NEXT: sunpklo z0.s, z0.h
-; STREAMING-SVE-NEXT: add z2.s, z3.s, z2.s
-; STREAMING-SVE-NEXT: add z1.s, z4.s, z1.s
-; STREAMING-SVE-NEXT: add z0.s, z0.s, z5.s
-; STREAMING-SVE-NEXT: add z0.s, z2.s, z0.s
+; STREAMING-SVE-NEXT: sunpklo z6.s, z6.h
+; STREAMING-SVE-NEXT: ext z18.b, { z16.b, z17.b }, #8
+; STREAMING-SVE-NEXT: sunpklo z3.s, z5.h
+; STREAMING-SVE-NEXT: sunpklo z5.s, z19.h
+; STREAMING-SVE-NEXT: sunpklo z7.s, z16.h
+; STREAMING-SVE-NEXT: add z1.s, z2.s, z1.s
+; STREAMING-SVE-NEXT: sunpklo z4.s, z18.h
+; STREAMING-SVE-NEXT: add z0.s, z5.s, z0.s
+; STREAMING-SVE-NEXT: add z2.s, z7.s, z6.s
+; STREAMING-SVE-NEXT: add z3.s, z4.s, z3.s
+; STREAMING-SVE-NEXT: add z1.s, z1.s, z2.s
+; STREAMING-SVE-NEXT: add z0.s, z0.s, z3.s
; STREAMING-SVE-NEXT: add z0.s, z1.s, z0.s
; STREAMING-SVE-NEXT: uaddv d0, p0, z0.s
; STREAMING-SVE-NEXT: fmov w0, s0
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splice.ll b/llvm/test/CodeGen/AArch64/sve-vector-splice.ll
new file mode 100644
index 0000000..5d2a125
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splice.ll
@@ -0,0 +1,253 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve -verify-machineinstrs < %s | FileCheck %s --check-prefixes=SVE
+; RUN: llc -mattr=+sve2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=SVE2
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Test vector_splice patterns.
+; Note that this test is similar to named-vector-shuffles-sve.ll, but it focuses
+; on testing all supported types, and a positive "splice index".
+
+
+; i8 elements
+define <vscale x 16 x i8> @splice_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; SVE-LABEL: splice_nxv16i8:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #1
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv16i8:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #1
+; SVE2-NEXT: ret
+ %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1)
+ ret <vscale x 16 x i8> %res
+}
+
+; i16 elements
+define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; SVE-LABEL: splice_nxv8i16:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #2
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv8i16:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #2
+; SVE2-NEXT: ret
+ %res = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 1)
+ ret <vscale x 8 x i16> %res
+}
+
+; bf16 elements
+
+define <vscale x 8 x bfloat> @splice_nxv8bfloat(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; SVE-LABEL: splice_nxv8bfloat:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #2
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv8bfloat:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #2
+; SVE2-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bfloat(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 1)
+ ret <vscale x 8 x bfloat> %res
+}
+
+define <vscale x 4 x bfloat> @splice_nxv4bfloat(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
+; SVE-LABEL: splice_nxv4bfloat:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #4
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv4bfloat:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #4
+; SVE2-NEXT: ret
+ %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bfloat(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 1)
+ ret <vscale x 4 x bfloat> %res
+}
+
+define <vscale x 2 x bfloat> @splice_nxv2bfloat(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
+; SVE-LABEL: splice_nxv2bfloat:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #8
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv2bfloat:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: ret
+ %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv4bfloat(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 1)
+ ret <vscale x 2 x bfloat> %res
+}
+
+; f16 elements
+
+define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; SVE-LABEL: splice_nxv8f16:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #2
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv8f16:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #2
+; SVE2-NEXT: ret
+ %res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 1)
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @splice_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; SVE-LABEL: splice_nxv4f16:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #4
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv4f16:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #4
+; SVE2-NEXT: ret
+ %res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 1)
+ ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @splice_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
+; SVE-LABEL: splice_nxv2f16:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #8
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv2f16:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: ret
+ %res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 1)
+ ret <vscale x 2 x half> %res
+}
+
+; i32 elements
+define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; SVE-LABEL: splice_nxv4i32:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #4
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv4i32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #4
+; SVE2-NEXT: ret
+ %res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 1)
+ ret <vscale x 4 x i32> %res
+}
+
+; f32 elements
+
+define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; SVE-LABEL: splice_nxv4f32:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #4
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv4f32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #4
+; SVE2-NEXT: ret
+ %res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 1)
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @splice_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; SVE-LABEL: splice_nxv2f32:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #8
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv2f32:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: ret
+ %res = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 1)
+ ret <vscale x 2 x float> %res
+}
+
+; i64 elements
+define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; SVE-LABEL: splice_nxv2i64:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #8
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv2i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: ret
+ %res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 1)
+ ret <vscale x 2 x i64> %res
+}
+
+; f64 elements
+define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; SVE-LABEL: splice_nxv2f64:
+; SVE: // %bb.0:
+; SVE-NEXT: ext z0.b, z0.b, z1.b, #8
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: splice_nxv2f64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #8
+; SVE2-NEXT: ret
+ %res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 1)
+ ret <vscale x 2 x double> %res
+}
+
+declare <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, i32)
+declare <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, i32)
+declare <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, i32)
+declare <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i32)
+
+declare <vscale x 2 x i8> @llvm.vector.splice.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, i32)
+declare <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, i32)
+declare <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, i32)
+declare <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
+declare <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i32)
+declare <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
+declare <vscale x 16 x float> @llvm.vector.splice.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, i32)
+declare <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
+
+declare <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32)
+declare <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32)
+declare <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
diff --git a/llvm/test/CodeGen/AArch64/sve2-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve2-fixed-length-extract-subvector.ll
index b96fad8..6fd3aff 100644
--- a/llvm/test/CodeGen/AArch64/sve2-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-fixed-length-extract-subvector.ll
@@ -52,9 +52,8 @@ define void @extract_v4i64_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2,2
; CHECK-LABEL: extract_v4i64_halves:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT: str q1, [x1]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT: str q2, [x1]
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: ret
entry:
@@ -70,9 +69,8 @@ define void @extract_v4double_halves(ptr %in, ptr %out, ptr %out2) vscale_range(
; CHECK-LABEL: extract_v4double_halves:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT: str q1, [x1]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT: str q2, [x1]
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: ret
entry:
@@ -88,9 +86,8 @@ define void @extract_v8i32_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2,2
; CHECK-LABEL: extract_v8i32_halves:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT: str q1, [x1]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT: str q2, [x1]
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: ret
entry:
@@ -110,9 +107,8 @@ define void @extract_v8i32_halves_intrinsic(ptr %in, ptr %out, ptr %out2) vscale
; CHECK-LABEL: extract_v8i32_halves_intrinsic:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT: str q1, [x1]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT: str q2, [x1]
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: ret
entry:
@@ -128,9 +124,8 @@ define void @extract_v8float_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2
; CHECK-LABEL: extract_v8float_halves:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT: str q1, [x1]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT: str q2, [x1]
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: ret
entry:
@@ -146,9 +141,8 @@ define void @extract_v8i32_half_unaligned(<8 x i32> %unused, ptr %in, ptr %out)
; CHECK-LABEL: extract_v8i32_half_unaligned:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #8
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT: ext v0.16b, v0.16b, v2.16b, #8
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
entry:
@@ -162,15 +156,13 @@ define void @extract_v8i32_quarters(ptr %in, ptr %out, ptr %out2, ptr %out3, ptr
; CHECK-LABEL: extract_v8i32_quarters:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: mov z2.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT: ext z2.b, z2.b, z0.b, #24
-; CHECK-NEXT: str d1, [x1]
-; CHECK-NEXT: str d2, [x2]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT: ext z3.b, { z0.b, z1.b }, #24
+; CHECK-NEXT: ext z4.b, { z0.b, z1.b }, #8
+; CHECK-NEXT: str d2, [x1]
+; CHECK-NEXT: str d3, [x2]
; CHECK-NEXT: str d0, [x3]
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: str d0, [x4]
+; CHECK-NEXT: str d4, [x4]
; CHECK-NEXT: ret
entry:
%b = load <8 x i32>, ptr %in
@@ -189,9 +181,8 @@ define void @extract_v16i16_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2,
; CHECK-LABEL: extract_v16i16_halves:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT: str q1, [x1]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT: str q2, [x1]
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: ret
entry:
@@ -223,9 +214,8 @@ define void @extract_v16half_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2
; CHECK-LABEL: extract_v16half_halves:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT: str q1, [x1]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT: str q2, [x1]
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: ret
entry:
@@ -241,9 +231,8 @@ define void @extract_v32i8_halves(ptr %in, ptr %out, ptr %out2) vscale_range(2,2
; CHECK-LABEL: extract_v32i8_halves:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
-; CHECK-NEXT: str q1, [x1]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #16
+; CHECK-NEXT: str q2, [x1]
; CHECK-NEXT: str q0, [x2]
; CHECK-NEXT: ret
entry:
@@ -264,9 +253,8 @@ define void @extract_v8i64_halves(ptr %in, ptr %out, ptr %out2) vscale_range(4,4
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ptrue p0.d, vl4
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #32
-; CHECK-NEXT: st1d { z1.d }, p0, [x1]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #32
+; CHECK-NEXT: st1d { z2.d }, p0, [x1]
; CHECK-NEXT: st1d { z0.d }, p0, [x2]
; CHECK-NEXT: ret
entry:
@@ -283,9 +271,8 @@ define void @extract_v16i32_halves(ptr %in, ptr %out, ptr %out2) vscale_range(4,
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ptrue p0.s, vl8
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #32
-; CHECK-NEXT: st1w { z1.s }, p0, [x1]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #32
+; CHECK-NEXT: st1w { z2.s }, p0, [x1]
; CHECK-NEXT: st1w { z0.s }, p0, [x2]
; CHECK-NEXT: ret
entry:
@@ -302,9 +289,8 @@ define void @extract_v32i16_halves(ptr %in, ptr %out, ptr %out2) vscale_range(4,
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ptrue p0.h, vl16
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #32
-; CHECK-NEXT: st1h { z1.h }, p0, [x1]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #32
+; CHECK-NEXT: st1h { z2.h }, p0, [x1]
; CHECK-NEXT: st1h { z0.h }, p0, [x2]
; CHECK-NEXT: ret
entry:
@@ -322,9 +308,8 @@ define void @extract_v64i8_halves(ptr %in, ptr %out, ptr %out2) vscale_range(4,4
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ptrue p0.b, vl32
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #32
-; CHECK-NEXT: st1b { z1.b }, p0, [x1]
+; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #32
+; CHECK-NEXT: st1b { z2.b }, p0, [x1]
; CHECK-NEXT: st1b { z0.b }, p0, [x2]
; CHECK-NEXT: ret
entry: