aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll')
-rw-r--r--llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll32
1 files changed, 16 insertions, 16 deletions
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll
index 1fb251a..7e2d28f 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll
@@ -82,7 +82,6 @@ define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale
; CONTIGUOUS-NEXT: ldr z0, [sp]
; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #2
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -98,6 +97,7 @@ define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale
; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #16
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -190,7 +190,6 @@ define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8_scalar(<vscale x 16 x i8> %unused,
; CONTIGUOUS-NEXT: ldr z0, [sp]
; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #2
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -206,6 +205,7 @@ define <vscale x 32 x i8> @ldnt1_x2_i8_z0_z8_scalar(<vscale x 16 x i8> %unused,
; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #16
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -299,7 +299,6 @@ define <vscale x 16 x i16> @ldnt1_x2_i16_z0_z8(<vscale x 8 x i16> %unused, <vsca
; CONTIGUOUS-NEXT: ldr z0, [sp]
; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #2
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -315,6 +314,7 @@ define <vscale x 16 x i16> @ldnt1_x2_i16_z0_z8(<vscale x 8 x i16> %unused, <vsca
; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #16
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -407,7 +407,6 @@ define <vscale x 16 x i16> @ldnt1_x2_i16_z0_z8_scalar(<vscale x 8 x i16> %unused
; CONTIGUOUS-NEXT: ldr z0, [sp]
; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #2
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -423,6 +422,7 @@ define <vscale x 16 x i16> @ldnt1_x2_i16_z0_z8_scalar(<vscale x 8 x i16> %unused
; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #16
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -516,7 +516,6 @@ define <vscale x 8 x i32> @ldnt1_x2_i32_z0_z8(<vscale x 4 x i32> %unused, <vscal
; CONTIGUOUS-NEXT: ldr z0, [sp]
; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #2
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -532,6 +531,7 @@ define <vscale x 8 x i32> @ldnt1_x2_i32_z0_z8(<vscale x 4 x i32> %unused, <vscal
; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #16
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -624,7 +624,6 @@ define <vscale x 8 x i32> @ldnt1_x2_i32_z0_z8_scalar(<vscale x 4 x i32> %unused,
; CONTIGUOUS-NEXT: ldr z0, [sp]
; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #2
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -640,6 +639,7 @@ define <vscale x 8 x i32> @ldnt1_x2_i32_z0_z8_scalar(<vscale x 4 x i32> %unused,
; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #16
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -733,7 +733,6 @@ define <vscale x 4 x i64> @ldnt1_x2_i64_z0_z8(<vscale x 2 x i64> %unused, <vscal
; CONTIGUOUS-NEXT: ldr z0, [sp]
; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #2
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -749,6 +748,7 @@ define <vscale x 4 x i64> @ldnt1_x2_i64_z0_z8(<vscale x 2 x i64> %unused, <vscal
; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #16
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -841,7 +841,6 @@ define <vscale x 4 x i64> @ldnt1_x2_i64_z0_z8_scalar(<vscale x 2 x i64> %unused,
; CONTIGUOUS-NEXT: ldr z0, [sp]
; CONTIGUOUS-NEXT: ldr z1, [sp, #1, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #2
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -857,6 +856,7 @@ define <vscale x 4 x i64> @ldnt1_x2_i64_z0_z8_scalar(<vscale x 2 x i64> %unused,
; CONTIGUOUS-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #16
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -955,7 +955,6 @@ define <vscale x 64 x i8> @ldnt1_x4_i8_z0_z4_z8_z12(<vscale x 16 x i8> %unused,
; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl]
; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #4
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -970,6 +969,7 @@ define <vscale x 64 x i8> @ldnt1_x4_i8_z0_z4_z8_z12(<vscale x 16 x i8> %unused,
; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #15
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -1071,7 +1071,6 @@ define <vscale x 64 x i8> @ldnt1_x4_i8_z0_z4_z8_z12_scalar(<vscale x 16 x i8> %u
; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl]
; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #4
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1086,6 +1085,7 @@ define <vscale x 64 x i8> @ldnt1_x4_i8_z0_z4_z8_z12_scalar(<vscale x 16 x i8> %u
; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #15
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -1188,7 +1188,6 @@ define <vscale x 32 x i16> @ldnt1_x4_i16_z0_z4_z8_z12(<vscale x 8 x i16> %unused
; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl]
; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #4
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1203,6 +1202,7 @@ define <vscale x 32 x i16> @ldnt1_x4_i16_z0_z4_z8_z12(<vscale x 8 x i16> %unused
; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #15
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -1304,7 +1304,6 @@ define <vscale x 32 x i16> @ldnt1_x4_i16_z0_z4_z8_z12_scalar(<vscale x 8 x i16>
; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl]
; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #4
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1319,6 +1318,7 @@ define <vscale x 32 x i16> @ldnt1_x4_i16_z0_z4_z8_z12_scalar(<vscale x 8 x i16>
; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #15
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -1421,7 +1421,6 @@ define <vscale x 16 x i32> @ldnt1_x4_i32_z0_z4_z8_z12(<vscale x 4 x i32> %unused
; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl]
; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #4
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1436,6 +1435,7 @@ define <vscale x 16 x i32> @ldnt1_x4_i32_z0_z4_z8_z12(<vscale x 4 x i32> %unused
; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #15
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -1537,7 +1537,6 @@ define <vscale x 16 x i32> @ldnt1_x4_i32_z0_z4_z8_z12_scalar(<vscale x 4 x i32>
; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl]
; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #4
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1552,6 +1551,7 @@ define <vscale x 16 x i32> @ldnt1_x4_i32_z0_z4_z8_z12_scalar(<vscale x 4 x i32>
; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #15
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -1654,7 +1654,6 @@ define <vscale x 8 x i64> @ldnt1_x4_i64_z0_z4_z8_z12(<vscale x 2 x i64> %unused,
; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl]
; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #4
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1669,6 +1668,7 @@ define <vscale x 8 x i64> @ldnt1_x4_i64_z0_z4_z8_z12(<vscale x 2 x i64> %unused,
; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #15
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret
@@ -1770,7 +1770,6 @@ define <vscale x 8 x i64> @ldnt1_x4_i64_z0_z4_z8_z12_scalar(<vscale x 2 x i64> %
; CONTIGUOUS-NEXT: ldr z2, [sp, #2, mul vl]
; CONTIGUOUS-NEXT: ldr z3, [sp, #3, mul vl]
; CONTIGUOUS-NEXT: addvl sp, sp, #4
-; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1785,6 +1784,7 @@ define <vscale x 8 x i64> @ldnt1_x4_i64_z0_z4_z8_z12_scalar(<vscale x 2 x i64> %
; CONTIGUOUS-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
; CONTIGUOUS-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
+; CONTIGUOUS-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
; CONTIGUOUS-NEXT: addvl sp, sp, #15
; CONTIGUOUS-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CONTIGUOUS-NEXT: ret