aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll')
-rw-r--r--llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll518
1 files changed, 302 insertions, 216 deletions
diff --git a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
index 1d1bae4..500c511 100644
--- a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
+++ b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
@@ -16,11 +16,12 @@ define void @dont_coalesce_arg_i8(i8 %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: mov x19, x1
@@ -28,12 +29,12 @@ define void @dont_coalesce_arg_i8(i8 %arg, ptr %ptr) #0 {
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl use_i8
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: st1b { z0.b }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -49,11 +50,12 @@ define void @dont_coalesce_arg_i16(i16 %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: mov x19, x1
@@ -61,12 +63,12 @@ define void @dont_coalesce_arg_i16(i16 %arg, ptr %ptr) #0 {
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl use_i16
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -82,11 +84,12 @@ define void @dont_coalesce_arg_i32(i32 %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: mov x19, x1
@@ -94,12 +97,12 @@ define void @dont_coalesce_arg_i32(i32 %arg, ptr %ptr) #0 {
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl use_i32
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -115,11 +118,12 @@ define void @dont_coalesce_arg_i64(i64 %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: mov x19, x1
@@ -127,12 +131,12 @@ define void @dont_coalesce_arg_i64(i64 %arg, ptr %ptr) #0 {
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl use_i64
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -148,11 +152,12 @@ define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
@@ -165,14 +170,14 @@ define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload
; CHECK-NEXT: bl use_f16
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -188,11 +193,12 @@ define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
@@ -205,14 +211,14 @@ define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-NEXT: bl use_f32
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -228,11 +234,12 @@ define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
@@ -245,14 +252,14 @@ define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl use_f64
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -273,11 +280,12 @@ define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v1i8:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
@@ -290,14 +298,14 @@ define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl use_v16i8
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1b { z0.b }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -314,11 +322,12 @@ define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
@@ -331,14 +340,14 @@ define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl use_v8i16
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -355,11 +364,12 @@ define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v1i32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
@@ -372,14 +382,14 @@ define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl use_v4i32
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -396,11 +406,12 @@ define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
@@ -413,14 +424,14 @@ define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl use_v2i64
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -437,11 +448,12 @@ define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
@@ -454,14 +466,14 @@ define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload
; CHECK-NEXT: bl use_v8f16
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -478,11 +490,12 @@ define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v1f32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
@@ -495,14 +508,14 @@ define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl use_v4f32
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -519,11 +532,12 @@ define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
@@ -536,14 +550,14 @@ define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl use_v2f64
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -564,11 +578,12 @@ define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
@@ -581,14 +596,14 @@ define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: bl use_v16i8
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1b { z0.b }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -604,11 +619,12 @@ define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
@@ -621,14 +637,14 @@ define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: bl use_v8i16
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -644,11 +660,12 @@ define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
@@ -661,14 +678,14 @@ define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: bl use_v4i32
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -684,11 +701,12 @@ define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
@@ -701,14 +719,14 @@ define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: bl use_v2i64
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -724,11 +742,12 @@ define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
@@ -741,14 +760,14 @@ define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: bl use_v8f16
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -764,11 +783,12 @@ define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
@@ -781,14 +801,14 @@ define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: bl use_v8bf16
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -804,11 +824,12 @@ define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
@@ -821,14 +842,14 @@ define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: bl use_v4f32
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -844,11 +865,12 @@ define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
@@ -861,14 +883,14 @@ define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 {
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: bl use_v2f64
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -887,11 +909,12 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_arg_v8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
@@ -900,10 +923,10 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 {
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: and z1.b, z1.b, #0x1
; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
; CHECK-NEXT: str p0, [x8, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-NEXT: smstop sm
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: bl use_v8i1
@@ -913,8 +936,8 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 {
; CHECK-NEXT: str p0, [x19]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
@@ -933,23 +956,26 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 {
define void @dont_coalesce_res_i8(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_i8
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: st1b { z0.b }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = call i8 @get_i8()
%vec = insertelement <vscale x 16 x i8> poison, i8 %res, i32 0
@@ -960,23 +986,26 @@ define void @dont_coalesce_res_i8(ptr %ptr) #0 {
define void @dont_coalesce_res_i16(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_i16
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = call i16 @get_i16()
%vec = insertelement <vscale x 8 x i16> poison, i16 %res, i32 0
@@ -987,23 +1016,26 @@ define void @dont_coalesce_res_i16(ptr %ptr) #0 {
define void @dont_coalesce_res_i32(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_i32
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = call i32 @get_i32()
%vec = insertelement <vscale x 4 x i32> poison, i32 %res, i32 0
@@ -1014,23 +1046,26 @@ define void @dont_coalesce_res_i32(ptr %ptr) #0 {
define void @dont_coalesce_res_i64(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_i64
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fmov d0, x0
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
%res = call i64 @get_i64()
%vec = insertelement <vscale x 2 x i64> poison, i64 %res, i32 0
@@ -1041,27 +1076,30 @@ define void @dont_coalesce_res_i64(ptr %ptr) #0 {
define void @dont_coalesce_res_f16(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_f16
; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload
+; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call half @get_f16()
%vec = insertelement <vscale x 8 x half> poison, half %res, i32 0
@@ -1072,12 +1110,14 @@ define void @dont_coalesce_res_f16(ptr %ptr) #0 {
define void @dont_coalesce_res_f32(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_f32
@@ -1087,11 +1127,12 @@ define void @dont_coalesce_res_f32(ptr %ptr) #0 {
; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call float @get_f32()
%vec = insertelement <vscale x 4 x float> poison, float %res, i32 0
@@ -1102,12 +1143,14 @@ define void @dont_coalesce_res_f32(ptr %ptr) #0 {
define void @dont_coalesce_res_f64(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_f64
@@ -1117,11 +1160,12 @@ define void @dont_coalesce_res_f64(ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call double @get_f64()
%vec = insertelement <vscale x 2 x double> poison, double %res, i32 0
@@ -1136,12 +1180,14 @@ define void @dont_coalesce_res_f64(ptr %ptr) #0 {
define void @dont_coalesce_res_v1i8(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v1i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v1i8
@@ -1151,11 +1197,12 @@ define void @dont_coalesce_res_v1i8(ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1b { z0.b }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <1 x i8> @get_v1i8()
%elt = extractelement <1 x i8> %res, i32 0
@@ -1167,12 +1214,14 @@ define void @dont_coalesce_res_v1i8(ptr %ptr) #0 {
define void @dont_coalesce_res_v1i16(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v1i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v1i16
@@ -1182,11 +1231,12 @@ define void @dont_coalesce_res_v1i16(ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <1 x i16> @get_v1i16()
%elt = extractelement <1 x i16> %res, i32 0
@@ -1198,12 +1248,14 @@ define void @dont_coalesce_res_v1i16(ptr %ptr) #0 {
define void @dont_coalesce_res_v1i32(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v1i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v1i32
@@ -1213,11 +1265,12 @@ define void @dont_coalesce_res_v1i32(ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <1 x i32> @get_v1i32()
%elt = extractelement <1 x i32> %res, i32 0
@@ -1229,12 +1282,14 @@ define void @dont_coalesce_res_v1i32(ptr %ptr) #0 {
define void @dont_coalesce_res_v1i64(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v1i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v1i64
@@ -1244,11 +1299,12 @@ define void @dont_coalesce_res_v1i64(ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <1 x i64> @get_v1i64()
%elt = extractelement <1 x i64> %res, i32 0
@@ -1260,27 +1316,30 @@ define void @dont_coalesce_res_v1i64(ptr %ptr) #0 {
define void @dont_coalesce_res_v1f16(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v1f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v1f16
; CHECK-NEXT: str h0, [sp, #14] // 2-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload
+; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <1 x half> @get_v1f16()
%elt = extractelement <1 x half> %res, i32 0
@@ -1292,12 +1351,14 @@ define void @dont_coalesce_res_v1f16(ptr %ptr) #0 {
define void @dont_coalesce_res_v1f32(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v1f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v1f32
@@ -1307,11 +1368,12 @@ define void @dont_coalesce_res_v1f32(ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <1 x float> @get_v1f32()
%elt = extractelement <1 x float> %res, i32 0
@@ -1323,12 +1385,14 @@ define void @dont_coalesce_res_v1f32(ptr %ptr) #0 {
define void @dont_coalesce_res_v1f64(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v1f64
@@ -1338,11 +1402,12 @@ define void @dont_coalesce_res_v1f64(ptr %ptr) #0 {
; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <1 x double> @get_v1f64()
%elt = extractelement <1 x double> %res, i32 0
@@ -1358,27 +1423,30 @@ define void @dont_coalesce_res_v1f64(ptr %ptr) #0 {
define void @dont_coalesce_res_v16i8(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v16i8
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1b { z0.b }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <16 x i8> @get_v16i8()
%vec = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> %res, i64 0)
@@ -1389,27 +1457,30 @@ define void @dont_coalesce_res_v16i8(ptr %ptr) #0 {
define void @dont_coalesce_res_v8i16(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v8i16
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <8 x i16> @get_v8i16()
%vec = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> %res, i64 0)
@@ -1420,27 +1491,30 @@ define void @dont_coalesce_res_v8i16(ptr %ptr) #0 {
define void @dont_coalesce_res_v4i32(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v4i32
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <4 x i32> @get_v4i32()
%vec = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> %res, i64 0)
@@ -1451,27 +1525,30 @@ define void @dont_coalesce_res_v4i32(ptr %ptr) #0 {
define void @dont_coalesce_res_v2i64(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v2i64
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <2 x i64> @get_v2i64()
%vec = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> %res, i64 0)
@@ -1482,27 +1559,30 @@ define void @dont_coalesce_res_v2i64(ptr %ptr) #0 {
define void @dont_coalesce_res_v8f16(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v8f16
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1h { z0.h }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <8 x half> @get_v8f16()
%vec = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> poison, <8 x half> %res, i64 0)
@@ -1513,27 +1593,30 @@ define void @dont_coalesce_res_v8f16(ptr %ptr) #0 {
define void @dont_coalesce_res_v4f32(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v4f32
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1w { z0.s }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <4 x float> @get_v4f32()
%vec = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %res, i64 0)
@@ -1544,27 +1627,30 @@ define void @dont_coalesce_res_v4f32(ptr %ptr) #0 {
define void @dont_coalesce_res_v2f64(ptr %ptr) #0 {
; CHECK-LABEL: dont_coalesce_res_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: smstop sm
; CHECK-NEXT: bl get_v2f64
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: smstart sm
-; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: st1d { z0.d }, p0, [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #96
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%res = call <2 x double> @get_v2f64()
%vec = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> poison, <2 x double> %res, i64 0)