diff options
author | Benjamin Maxwell <benjamin.maxwell@arm.com> | 2025-05-08 17:37:45 +0000 |
---|---|---|
committer | Benjamin Maxwell <benjamin.maxwell@arm.com> | 2025-09-18 14:40:31 +0000 |
commit | f7d7b98bcc360c192fe277ec7cf0c94c2337b3d1 (patch) | |
tree | 2e0ca8f99696cca888c90c70101bdb48ce1b5927 /llvm/test/CodeGen/AArch64/stack-hazard.ll | |
parent | 0dfb0725e2a4f82af47821946bfbbfcd7ed08e10 (diff) | |
download | llvm-users/MacDue/split_pprs.zip llvm-users/MacDue/split_pprs.tar.gz llvm-users/MacDue/split_pprs.tar.bz2 |
[AArch64][SME] Support split ZPR and PPR area allocationusers/MacDue/split_pprs
For a while we have supported the `-aarch64-stack-hazard-size=<size>`
option, which adds "hazard padding" between GPRs and FPR/ZPRs. However,
there is currently a hole in this mitigation as PPR and FPR/ZPR accesses
to the same area also cause streaming memory hazards (this is noted by
`-pass-remarks-analysis=sme -aarch64-stack-hazard-remark-size=<val>`),
and the current stack layout places PPRs and ZPRs within the same area.
Which looks like:
------------------------------------ Higher address
| callee-saved gpr registers |
|---------------------------------- |
| lr,fp (a.k.a. "frame record") |
|-----------------------------------| <- fp(=x29)
| <hazard padding> |
|-----------------------------------|
| callee-saved fp/simd/SVE regs |
|-----------------------------------|
| SVE stack objects |
|-----------------------------------|
| local variables of fixed size |
| <FPR> |
| <hazard padding> |
| <GPR> |
------------------------------------| <- sp
| Lower address
With this patch the stack (and hazard padding) is rearranged so that
hazard padding is placed between the PPRs and ZPRs rather than within
the (fixed size) callee-save region. Which looks something like this:
------------------------------------ Higher address
| callee-saved gpr registers |
|---------------------------------- |
| lr,fp (a.k.a. "frame record") |
|-----------------------------------| <- fp(=x29)
| callee-saved PPRs |
| PPR stack objects | (These are SVE predicates)
|-----------------------------------|
| <hazard padding> |
|-----------------------------------|
| callee-saved ZPR regs | (These are SVE vectors)
| ZPR stack objects | Note: FPRs are promoted to ZPRs
|-----------------------------------|
| local variables of fixed size |
| <FPR> |
| <hazard padding> |
| <GPR> |
------------------------------------| <- sp
| Lower address
This layout is only enabled if:
* SplitSVEObjects are enabled (`-aarch64-split-sve-objects`)
- (This may be enabled by default in a later patch)
* Streaming memory hazards are present
- (`-aarch64-stack-hazard-size=<val>` != 0)
* PPRs and FPRs/ZPRs are on the stack
* There's no stack realignment or variable-sized objects
- This is left as a TODO for now
Additionally, any FPR callee-saves that are present will be promoted to
ZPRs. This is to prevent stack hazards between FPRs and GRPs in the
fixed size callee-save area (which would otherwise require more hazard
padding, or moving the FPR callee-saves).
This layout should resolve the hole in the hazard padding mitigation,
and is not intended change codegen for non-SME code.
Change-Id: I2e1906577c2ac79c40bc69e7c15e3ef09857445f
Diffstat (limited to 'llvm/test/CodeGen/AArch64/stack-hazard.ll')
-rw-r--r-- | llvm/test/CodeGen/AArch64/stack-hazard.ll | 876 |
1 files changed, 544 insertions, 332 deletions
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll index 5f52280..333a8be 100644 --- a/llvm/test/CodeGen/AArch64/stack-hazard.ll +++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefixes=CHECK,CHECK0 ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=64 | FileCheck %s --check-prefixes=CHECK,CHECK64 -; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefixes=CHECK,CHECK1024 +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefixes=CHECK,CHECK1024,CHECK1024-NOSPLITSVE +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-split-sve-objects -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefixes=CHECK,CHECK1024,CHECK1024-SPLITSVE define i32 @basic(i32 noundef %num) { ; CHECK-LABEL: basic: @@ -1503,72 +1504,24 @@ define [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1([2 x <vscale x 4 x i1> } define [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1_caller([2 x <vscale x 4 x i1>] %arg1, [2 x <vscale x 4 x i1>] %arg2) nounwind "aarch64_pstate_sm_compatible" { -; CHECK0-LABEL: sve_signature_pred_2xv4i1_caller: -; CHECK0: // %bb.0: -; CHECK0-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK0-NEXT: addvl sp, sp, #-1 -; CHECK0-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: mov p5.b, p0.b -; CHECK0-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: mov p4.b, p1.b -; CHECK0-NEXT: mov p0.b, p2.b -; CHECK0-NEXT: mov p1.b, p3.b -; CHECK0-NEXT: mov p2.b, p5.b -; CHECK0-NEXT: mov p3.b, p4.b -; CHECK0-NEXT: bl sve_signature_pred_2xv4i1 -; CHECK0-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: addvl sp, sp, #1 -; CHECK0-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK0-NEXT: ret -; -; CHECK64-LABEL: sve_signature_pred_2xv4i1_caller: -; CHECK64: // %bb.0: -; CHECK64-NEXT: sub sp, sp, #80 -; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK64-NEXT: addvl sp, sp, #-1 -; CHECK64-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: sub sp, sp, #64 -; CHECK64-NEXT: mov p4.b, p1.b -; CHECK64-NEXT: mov p5.b, p0.b -; CHECK64-NEXT: mov p0.b, p2.b -; CHECK64-NEXT: mov p1.b, p3.b -; CHECK64-NEXT: mov p2.b, p5.b -; CHECK64-NEXT: mov p3.b, p4.b -; CHECK64-NEXT: bl sve_signature_pred_2xv4i1 -; CHECK64-NEXT: add sp, sp, #64 -; CHECK64-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: addvl sp, sp, #1 -; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload -; CHECK64-NEXT: add sp, sp, #80 -; CHECK64-NEXT: ret -; -; CHECK1024-LABEL: sve_signature_pred_2xv4i1_caller: -; CHECK1024: // %bb.0: -; CHECK1024-NEXT: sub sp, sp, #1040 -; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill -; CHECK1024-NEXT: addvl sp, sp, #-1 -; CHECK1024-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: sub sp, sp, #1024 -; CHECK1024-NEXT: mov p4.b, p1.b -; CHECK1024-NEXT: mov p5.b, p0.b -; CHECK1024-NEXT: mov p0.b, p2.b -; CHECK1024-NEXT: mov p1.b, p3.b -; CHECK1024-NEXT: mov p2.b, p5.b -; CHECK1024-NEXT: mov p3.b, p4.b -; CHECK1024-NEXT: bl sve_signature_pred_2xv4i1 -; CHECK1024-NEXT: add sp, sp, #1024 -; CHECK1024-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: addvl sp, sp, #1 -; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK1024-NEXT: add sp, sp, #1040 -; CHECK1024-NEXT: ret +; CHECK-LABEL: sve_signature_pred_2xv4i1_caller: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p5.b, p0.b +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p4.b, p1.b +; CHECK-NEXT: mov p0.b, p2.b +; CHECK-NEXT: mov p1.b, p3.b +; CHECK-NEXT: mov p2.b, p5.b +; CHECK-NEXT: mov p3.b, p4.b +; CHECK-NEXT: bl sve_signature_pred_2xv4i1 +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret %res = call [2 x <vscale x 4 x i1>] @sve_signature_pred_2xv4i1([2 x <vscale x 4 x i1>] %arg2, [2 x <vscale x 4 x i1>] %arg1) ret [2 x <vscale x 4 x i1>] %res } @@ -2113,139 +2066,269 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, ; CHECK64-NEXT: .cfi_restore w29 ; CHECK64-NEXT: ret ; -; CHECK1024-LABEL: svecc_call: -; CHECK1024: // %bb.0: // %entry -; CHECK1024-NEXT: sub sp, sp, #1088 -; CHECK1024-NEXT: .cfi_def_cfa_offset 1088 -; CHECK1024-NEXT: cntd x9 -; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill -; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill -; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill -; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill -; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill -; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill -; CHECK1024-NEXT: add x29, sp, #1024 -; CHECK1024-NEXT: .cfi_def_cfa w29, 64 -; CHECK1024-NEXT: .cfi_offset w19, -16 -; CHECK1024-NEXT: .cfi_offset w26, -24 -; CHECK1024-NEXT: .cfi_offset w27, -32 -; CHECK1024-NEXT: .cfi_offset w28, -40 -; CHECK1024-NEXT: .cfi_offset vg, -48 -; CHECK1024-NEXT: .cfi_offset w30, -56 -; CHECK1024-NEXT: .cfi_offset w29, -64 -; CHECK1024-NEXT: addvl sp, sp, #-18 -; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 8 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 16 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 24 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 32 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 40 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 48 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 56 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 64 * IncomingVG - 1088 -; CHECK1024-NEXT: sub sp, sp, #1024 -; CHECK1024-NEXT: mov x8, x0 -; CHECK1024-NEXT: bl __arm_sme_state -; CHECK1024-NEXT: mov x19, x0 -; CHECK1024-NEXT: //APP -; CHECK1024-NEXT: //NO_APP -; CHECK1024-NEXT: tbz w19, #0, .LBB28_2 -; CHECK1024-NEXT: // %bb.1: // %entry -; CHECK1024-NEXT: smstop sm -; CHECK1024-NEXT: .LBB28_2: // %entry -; CHECK1024-NEXT: mov x0, x8 -; CHECK1024-NEXT: mov w1, #45 // =0x2d -; CHECK1024-NEXT: mov w2, #37 // =0x25 -; CHECK1024-NEXT: bl memset -; CHECK1024-NEXT: tbz w19, #0, .LBB28_4 -; CHECK1024-NEXT: // %bb.3: // %entry -; CHECK1024-NEXT: smstart sm -; CHECK1024-NEXT: .LBB28_4: // %entry -; CHECK1024-NEXT: mov w0, #22647 // =0x5877 -; CHECK1024-NEXT: movk w0, #59491, lsl #16 -; CHECK1024-NEXT: add sp, sp, #1024 -; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: addvl sp, sp, #18 -; CHECK1024-NEXT: .cfi_restore z8 -; CHECK1024-NEXT: .cfi_restore z9 -; CHECK1024-NEXT: .cfi_restore z10 -; CHECK1024-NEXT: .cfi_restore z11 -; CHECK1024-NEXT: .cfi_restore z12 -; CHECK1024-NEXT: .cfi_restore z13 -; CHECK1024-NEXT: .cfi_restore z14 -; CHECK1024-NEXT: .cfi_restore z15 -; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088 -; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK1024-NEXT: add sp, sp, #1088 -; CHECK1024-NEXT: .cfi_def_cfa_offset 0 -; CHECK1024-NEXT: .cfi_restore w19 -; CHECK1024-NEXT: .cfi_restore w26 -; CHECK1024-NEXT: .cfi_restore w27 -; CHECK1024-NEXT: .cfi_restore w28 -; CHECK1024-NEXT: .cfi_restore vg -; CHECK1024-NEXT: .cfi_restore w30 -; CHECK1024-NEXT: .cfi_restore w29 -; CHECK1024-NEXT: ret +; CHECK1024-NOSPLITSVE-LABEL: svecc_call: +; CHECK1024-NOSPLITSVE: // %bb.0: // %entry +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa_offset 1088 +; CHECK1024-NOSPLITSVE-NEXT: cntd x9 +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: add x29, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w19, -16 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w26, -24 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w27, -32 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w28, -40 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset vg, -48 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w30, -56 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -64 +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-18 +; CHECK1024-NOSPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 8 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 16 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 24 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 32 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 40 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 48 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4e, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 56 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4f, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 64 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: mov x8, x0 +; CHECK1024-NOSPLITSVE-NEXT: bl __arm_sme_state +; CHECK1024-NOSPLITSVE-NEXT: mov x19, x0 +; CHECK1024-NOSPLITSVE-NEXT: //APP +; CHECK1024-NOSPLITSVE-NEXT: //NO_APP +; CHECK1024-NOSPLITSVE-NEXT: tbz w19, #0, .LBB28_2 +; CHECK1024-NOSPLITSVE-NEXT: // %bb.1: // %entry +; CHECK1024-NOSPLITSVE-NEXT: smstop sm +; CHECK1024-NOSPLITSVE-NEXT: .LBB28_2: // %entry +; CHECK1024-NOSPLITSVE-NEXT: mov x0, x8 +; CHECK1024-NOSPLITSVE-NEXT: mov w1, #45 // =0x2d +; CHECK1024-NOSPLITSVE-NEXT: mov w2, #37 // =0x25 +; CHECK1024-NOSPLITSVE-NEXT: bl memset +; CHECK1024-NOSPLITSVE-NEXT: tbz w19, #0, .LBB28_4 +; CHECK1024-NOSPLITSVE-NEXT: // %bb.3: // %entry +; CHECK1024-NOSPLITSVE-NEXT: smstart sm +; CHECK1024-NOSPLITSVE-NEXT: .LBB28_4: // %entry +; CHECK1024-NOSPLITSVE-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NOSPLITSVE-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #18 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z8 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z9 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z10 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z11 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z12 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z13 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z14 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z15 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa wsp, 1088 +; CHECK1024-NOSPLITSVE-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa_offset 0 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w19 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w26 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w27 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w28 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore vg +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w30 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w29 +; CHECK1024-NOSPLITSVE-NEXT: ret +; +; CHECK1024-SPLITSVE-LABEL: svecc_call: +; CHECK1024-SPLITSVE: // %bb.0: // %entry +; CHECK1024-SPLITSVE-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa_offset 64 +; CHECK1024-SPLITSVE-NEXT: cntd x9 +; CHECK1024-SPLITSVE-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: mov x29, sp +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w19, -8 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w26, -16 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w27, -24 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w28, -32 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset vg, -48 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w30, -56 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -64 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-2 +; CHECK1024-SPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-16 +; CHECK1024-SPLITSVE-NEXT: str z23, [sp] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 24 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 32 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 40 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 48 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 56 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 64 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4e, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 72 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4f, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 80 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: mov x8, x0 +; CHECK1024-SPLITSVE-NEXT: bl __arm_sme_state +; CHECK1024-SPLITSVE-NEXT: mov x19, x0 +; CHECK1024-SPLITSVE-NEXT: //APP +; CHECK1024-SPLITSVE-NEXT: //NO_APP +; CHECK1024-SPLITSVE-NEXT: tbz w19, #0, .LBB28_2 +; CHECK1024-SPLITSVE-NEXT: // %bb.1: // %entry +; CHECK1024-SPLITSVE-NEXT: smstop sm +; CHECK1024-SPLITSVE-NEXT: .LBB28_2: // %entry +; CHECK1024-SPLITSVE-NEXT: mov x0, x8 +; CHECK1024-SPLITSVE-NEXT: mov w1, #45 // =0x2d +; CHECK1024-SPLITSVE-NEXT: mov w2, #37 // =0x25 +; CHECK1024-SPLITSVE-NEXT: bl memset +; CHECK1024-SPLITSVE-NEXT: tbz w19, #0, .LBB28_4 +; CHECK1024-SPLITSVE-NEXT: // %bb.3: // %entry +; CHECK1024-SPLITSVE-NEXT: smstart sm +; CHECK1024-SPLITSVE-NEXT: .LBB28_4: // %entry +; CHECK1024-SPLITSVE-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-SPLITSVE-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: ldr z23, [sp] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #16 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z8 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z9 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z10 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z11 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z12 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z13 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z14 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z15 +; CHECK1024-SPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #2 +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa wsp, 64 +; CHECK1024-SPLITSVE-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa_offset 0 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w19 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w26 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w27 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w28 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore vg +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w30 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w29 +; CHECK1024-SPLITSVE-NEXT: ret entry: tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37) @@ -2505,138 +2588,267 @@ define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8 ; CHECK64-NEXT: .cfi_restore w29 ; CHECK64-NEXT: ret ; -; CHECK1024-LABEL: svecc_alloca_call: -; CHECK1024: // %bb.0: // %entry -; CHECK1024-NEXT: sub sp, sp, #1088 -; CHECK1024-NEXT: .cfi_def_cfa_offset 1088 -; CHECK1024-NEXT: cntd x9 -; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill -; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill -; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill -; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill -; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill -; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill -; CHECK1024-NEXT: add x29, sp, #1024 -; CHECK1024-NEXT: .cfi_def_cfa w29, 64 -; CHECK1024-NEXT: .cfi_offset w19, -16 -; CHECK1024-NEXT: .cfi_offset w26, -24 -; CHECK1024-NEXT: .cfi_offset w27, -32 -; CHECK1024-NEXT: .cfi_offset w28, -40 -; CHECK1024-NEXT: .cfi_offset vg, -48 -; CHECK1024-NEXT: .cfi_offset w30, -56 -; CHECK1024-NEXT: .cfi_offset w29, -64 -; CHECK1024-NEXT: addvl sp, sp, #-18 -; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 8 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 16 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 24 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 32 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 40 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 48 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 56 * IncomingVG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 64 * IncomingVG - 1088 -; CHECK1024-NEXT: sub sp, sp, #1072 -; CHECK1024-NEXT: bl __arm_sme_state -; CHECK1024-NEXT: mov x19, x0 -; CHECK1024-NEXT: //APP -; CHECK1024-NEXT: //NO_APP -; CHECK1024-NEXT: tbz w19, #0, .LBB29_2 -; CHECK1024-NEXT: // %bb.1: // %entry -; CHECK1024-NEXT: smstop sm -; CHECK1024-NEXT: .LBB29_2: // %entry -; CHECK1024-NEXT: mov x0, sp -; CHECK1024-NEXT: mov w1, #45 // =0x2d -; CHECK1024-NEXT: mov w2, #37 // =0x25 -; CHECK1024-NEXT: bl memset -; CHECK1024-NEXT: tbz w19, #0, .LBB29_4 -; CHECK1024-NEXT: // %bb.3: // %entry -; CHECK1024-NEXT: smstart sm -; CHECK1024-NEXT: .LBB29_4: // %entry -; CHECK1024-NEXT: mov w0, #22647 // =0x5877 -; CHECK1024-NEXT: movk w0, #59491, lsl #16 -; CHECK1024-NEXT: add sp, sp, #1072 -; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: addvl sp, sp, #18 -; CHECK1024-NEXT: .cfi_restore z8 -; CHECK1024-NEXT: .cfi_restore z9 -; CHECK1024-NEXT: .cfi_restore z10 -; CHECK1024-NEXT: .cfi_restore z11 -; CHECK1024-NEXT: .cfi_restore z12 -; CHECK1024-NEXT: .cfi_restore z13 -; CHECK1024-NEXT: .cfi_restore z14 -; CHECK1024-NEXT: .cfi_restore z15 -; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088 -; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK1024-NEXT: add sp, sp, #1088 -; CHECK1024-NEXT: .cfi_def_cfa_offset 0 -; CHECK1024-NEXT: .cfi_restore w19 -; CHECK1024-NEXT: .cfi_restore w26 -; CHECK1024-NEXT: .cfi_restore w27 -; CHECK1024-NEXT: .cfi_restore w28 -; CHECK1024-NEXT: .cfi_restore vg -; CHECK1024-NEXT: .cfi_restore w30 -; CHECK1024-NEXT: .cfi_restore w29 -; CHECK1024-NEXT: ret +; CHECK1024-NOSPLITSVE-LABEL: svecc_alloca_call: +; CHECK1024-NOSPLITSVE: // %bb.0: // %entry +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa_offset 1088 +; CHECK1024-NOSPLITSVE-NEXT: cntd x9 +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: add x29, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w19, -16 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w26, -24 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w27, -32 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w28, -40 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset vg, -48 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w30, -56 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -64 +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-18 +; CHECK1024-NOSPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 8 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 16 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 24 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 32 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 40 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 48 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4e, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 56 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4f, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 64 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1072 +; CHECK1024-NOSPLITSVE-NEXT: bl __arm_sme_state +; CHECK1024-NOSPLITSVE-NEXT: mov x19, x0 +; CHECK1024-NOSPLITSVE-NEXT: //APP +; CHECK1024-NOSPLITSVE-NEXT: //NO_APP +; CHECK1024-NOSPLITSVE-NEXT: tbz w19, #0, .LBB29_2 +; CHECK1024-NOSPLITSVE-NEXT: // %bb.1: // %entry +; CHECK1024-NOSPLITSVE-NEXT: smstop sm +; CHECK1024-NOSPLITSVE-NEXT: .LBB29_2: // %entry +; CHECK1024-NOSPLITSVE-NEXT: mov x0, sp +; CHECK1024-NOSPLITSVE-NEXT: mov w1, #45 // =0x2d +; CHECK1024-NOSPLITSVE-NEXT: mov w2, #37 // =0x25 +; CHECK1024-NOSPLITSVE-NEXT: bl memset +; CHECK1024-NOSPLITSVE-NEXT: tbz w19, #0, .LBB29_4 +; CHECK1024-NOSPLITSVE-NEXT: // %bb.3: // %entry +; CHECK1024-NOSPLITSVE-NEXT: smstart sm +; CHECK1024-NOSPLITSVE-NEXT: .LBB29_4: // %entry +; CHECK1024-NOSPLITSVE-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NOSPLITSVE-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1072 +; CHECK1024-NOSPLITSVE-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #18 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z8 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z9 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z10 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z11 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z12 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z13 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z14 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z15 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa wsp, 1088 +; CHECK1024-NOSPLITSVE-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa_offset 0 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w19 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w26 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w27 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w28 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore vg +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w30 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w29 +; CHECK1024-NOSPLITSVE-NEXT: ret +; +; CHECK1024-SPLITSVE-LABEL: svecc_alloca_call: +; CHECK1024-SPLITSVE: // %bb.0: // %entry +; CHECK1024-SPLITSVE-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa_offset 64 +; CHECK1024-SPLITSVE-NEXT: cntd x9 +; CHECK1024-SPLITSVE-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: mov x29, sp +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w19, -8 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w26, -16 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w27, -24 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w28, -32 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset vg, -48 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w30, -56 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -64 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-2 +; CHECK1024-SPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-16 +; CHECK1024-SPLITSVE-NEXT: str z23, [sp] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 24 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 32 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 40 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 48 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 56 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 64 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4e, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 72 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4f, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 80 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1072 +; CHECK1024-SPLITSVE-NEXT: bl __arm_sme_state +; CHECK1024-SPLITSVE-NEXT: mov x19, x0 +; CHECK1024-SPLITSVE-NEXT: //APP +; CHECK1024-SPLITSVE-NEXT: //NO_APP +; CHECK1024-SPLITSVE-NEXT: tbz w19, #0, .LBB29_2 +; CHECK1024-SPLITSVE-NEXT: // %bb.1: // %entry +; CHECK1024-SPLITSVE-NEXT: smstop sm +; CHECK1024-SPLITSVE-NEXT: .LBB29_2: // %entry +; CHECK1024-SPLITSVE-NEXT: mov x0, sp +; CHECK1024-SPLITSVE-NEXT: mov w1, #45 // =0x2d +; CHECK1024-SPLITSVE-NEXT: mov w2, #37 // =0x25 +; CHECK1024-SPLITSVE-NEXT: bl memset +; CHECK1024-SPLITSVE-NEXT: tbz w19, #0, .LBB29_4 +; CHECK1024-SPLITSVE-NEXT: // %bb.3: // %entry +; CHECK1024-SPLITSVE-NEXT: smstart sm +; CHECK1024-SPLITSVE-NEXT: .LBB29_4: // %entry +; CHECK1024-SPLITSVE-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-SPLITSVE-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1072 +; CHECK1024-SPLITSVE-NEXT: ldr z23, [sp] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #16 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z8 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z9 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z10 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z11 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z12 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z13 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z14 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z15 +; CHECK1024-SPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #2 +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa wsp, 64 +; CHECK1024-SPLITSVE-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa_offset 0 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w19 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w26 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w27 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w28 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore vg +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w30 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w29 +; CHECK1024-SPLITSVE-NEXT: ret entry: tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 |