diff options
Diffstat (limited to 'llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll')
| -rw-r--r-- | llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll | 534 |
1 files changed, 407 insertions, 127 deletions
diff --git a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll index 690a39d..492f73e 100644 --- a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll +++ b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll @@ -19,20 +19,16 @@ define void @zpr_and_ppr_local(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vec ; CHECK-LABEL: zpr_and_ppr_local: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #2048 +; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: add x8, sp, #2048 ; CHECK-NEXT: str p0, [x8, #15, mul vl] ; CHECK-NEXT: add x8, sp, #1024 ; CHECK-NEXT: str z0, [x8] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1024 +; CHECK-NEXT: add sp, sp, #2048 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ppr_local = alloca <vscale x 16 x i1> @@ -62,20 +58,16 @@ define void @zpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, <vscale x 16 x i8> % ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #2048 +; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub x8, x29, #1024 ; CHECK-NEXT: str p0, [x29, #-1, mul vl] ; CHECK-NEXT: str z0, [x8, #-2, mul vl] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1024 +; CHECK-NEXT: add sp, sp, #2048 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %ppr_local = alloca <vscale x 16 x i1> @@ -103,17 +95,15 @@ define void @fpr_and_ppr_local(<vscale x 16 x i1> %pred, double %double) "aarch6 ; CHECK-LABEL: fpr_and_ppr_local: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: sub sp, sp, #2064 ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: sub sp, sp, #1040 ; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: add x8, sp, #2064 ; CHECK-NEXT: str p0, [x8, #7, mul vl] ; CHECK-NEXT: str d0, [sp, #1032] -; CHECK-NEXT: add sp, sp, #1024 +; CHECK-NEXT: add sp, sp, #2064 ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1040 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ppr_local = alloca <vscale x 16 x i1> @@ -144,17 +134,15 @@ define void @fpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, double %double) "aar ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: sub sp, sp, #2064 ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: sub sp, sp, #1040 ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: str p0, [x29, #-1, mul vl] ; CHECK-NEXT: str d0, [sp, #1032] -; CHECK-NEXT: add sp, sp, #1024 +; CHECK-NEXT: add sp, sp, #2064 ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1040 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %ppr_local = alloca <vscale x 16 x i1> @@ -314,18 +302,18 @@ define void @all_stack_areas(<vscale x 16 x i1> %pred, double %fp) { ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: sub sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #-17 ; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill @@ -387,18 +375,18 @@ define void @all_stack_areas(<vscale x 16 x i1> %pred, double %fp) { ; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #17 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -471,21 +459,21 @@ define void @all_stack_areas_fp(<vscale x 16 x i1> %pred, double %fp) "frame-poi ; CHECK-LABEL: all_stack_areas_fp: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill -; CHECK-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x28, [sp, #16] // 8-byte Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: sub sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #-17 ; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill @@ -546,20 +534,20 @@ define void @all_stack_areas_fp(<vscale x 16 x i1> %pred, double %fp) "frame-poi ; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #17 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret %ppr_local = alloca <vscale x 16 x i1> @@ -619,7 +607,7 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: cntd x9 ; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: str x9, [sp, #16] // 8-byte Spill ; CHECK-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 64 @@ -631,18 +619,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, ; CHECK-NEXT: .cfi_offset w30, -56 ; CHECK-NEXT: .cfi_offset w29, -64 ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Spill ; CHECK-NEXT: sub sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #-16 ; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill @@ -716,18 +704,18 @@ define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, ; CHECK-NEXT: .cfi_restore z13 ; CHECK-NEXT: .cfi_restore z14 ; CHECK-NEXT: .cfi_restore z15 -; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: .cfi_def_cfa wsp, 64 ; CHECK-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload @@ -749,36 +737,23 @@ entry: } declare ptr @memset(ptr, i32, i32) -; FIXME: aarch64-split-sve-objects is currently not supported in this function -; as it requires stack reealignment (for the 32-byte aligned alloca). -; GPR CSRs -; <hazard padding> -; FPR CSRs -; <hazrd padding> -; <SVE locals (PPRs and ZPRs)> <--- hazard between PPRs and ZPRs here! -; <realignment padding> -; -> sp define void @zpr_and_ppr_local_realignment(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector, i64 %gpr) "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: zpr_and_ppr_local_realignment: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #1040 -; CHECK-NEXT: sub x9, sp, #1040 -; CHECK-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK-NEXT: add x29, sp, #1024 +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: sub x9, sp, #2064 +; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: addvl x9, x9, #-2 -; CHECK-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub x8, x29, #1024 -; CHECK-NEXT: str p0, [x8, #-1, mul vl] +; CHECK-NEXT: str p0, [x29, #-1, mul vl] ; CHECK-NEXT: str z0, [x8, #-2, mul vl] ; CHECK-NEXT: str x0, [sp] -; CHECK-NEXT: sub sp, x29, #1024 -; CHECK-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #1040 +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %ppr_local = alloca <vscale x 16 x i1> %zpr_local = alloca <vscale x 16 x i8> @@ -793,11 +768,8 @@ define void @zpr_and_ppr_local_stack_probing(<vscale x 16 x i1> %pred, <vscale x ; CHECK-LABEL: zpr_and_ppr_local_stack_probing: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str xzr, [sp] -; CHECK-NEXT: sub sp, sp, #1824 -; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #2848 +; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: str xzr, [sp] ; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xb0, 0x16, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2864 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 @@ -806,10 +778,8 @@ define void @zpr_and_ppr_local_stack_probing(<vscale x 16 x i1> %pred, <vscale x ; CHECK-NEXT: add x8, sp, #1824 ; CHECK-NEXT: str z0, [x8] ; CHECK-NEXT: str x0, [sp] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1824 +; CHECK-NEXT: add sp, sp, #2848 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret "probe-stack"="inline-asm" "stack-probe-size"="4096" "frame-pointer"="none" "aarch64_pstate_sm_compatible" @@ -822,3 +792,313 @@ define void @zpr_and_ppr_local_stack_probing(<vscale x 16 x i1> %pred, <vscale x store volatile i64 %gpr, ptr %gpr_local ret void } + +; Only PPR callee-saves + a VLA +; Expect: No hazard padding. Frame pointer (x29), p4-p6 callee saves allocated +; with `addvl #-1`, PPR saves restored using frame pointer `addvl sp, x29, #-1`. +define aarch64_sve_vector_pcs void @only_ppr_csr_vla(i64 %n) { +; CHECK-LABEL: only_ppr_csr_vla: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: add x9, x0, #15 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: // fake_use: $x8 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: addvl sp, x29, #-1 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret + %alloc = alloca i8, i64 %n, align 1 + call void (...) @llvm.fake.use(ptr %alloc) + tail call void asm sideeffect "", "~{p4},~{p5},~{p6}"() + ret void +} + +; Only ZPR callee-saves + a VLA +; Expect: Hazard padding, Frame pointer (x29), z8-z10 callee saves allocated +; with `addvl #-3`. ZPR saves restored from `FP - 1024 + addvl #-3`. +define aarch64_sve_vector_pcs void @only_zpr_csr_vla(i64 %n) { +; CHECK-LABEL: only_zpr_csr_vla: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-3 +; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1056 +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 16 * VG - 1056 +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 24 * VG - 1056 +; CHECK-NEXT: add x9, x0, #15 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: // fake_use: $x8 +; CHECK-NEXT: sub x8, x29, #1024 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: addvl sp, x8, #-3 +; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret + %alloc = alloca i8, i64 %n, align 1 + call void (...) @llvm.fake.use(ptr %alloc) + tail call void asm sideeffect "", "~{z8},~{z9},~{z10}"() + ret void +} + +; PPR+ZPR callee-saves + a VLA +; Expect: Hazard padding, Frame pointer (x29), PPR (p4-p6) and ZPR (z8-z10) +; callee-saves allocated separately, with hazard padding of 1024 between the +; areas. ZPR callee saves restored by `FP - 1024 + addvl #-4`, PPR callee saves +; restored by `FP + addvl #-1`. +define aarch64_sve_vector_pcs void @zpr_ppr_csr_vla(i64 %n) { +; CHECK-LABEL: zpr_ppr_csr_vla: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-3 +; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1056 +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 24 * VG - 1056 +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 32 * VG - 1056 +; CHECK-NEXT: add x9, x0, #15 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: // fake_use: $x8 +; CHECK-NEXT: sub x8, x29, #1024 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: addvl sp, x8, #-4 +; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: addvl sp, x29, #-1 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret + %alloc = alloca i8, i64 %n, align 1 + call void (...) @llvm.fake.use(ptr %alloc) + tail call void asm sideeffect "", "~{p4},~{p5},~{p6},~{z8},~{z9},~{z10}"() + ret void +} + +; Only PPR callee-saves (and ZPR/PPR locals) + a VLA +; Expect: Hazard padding, Frame pointer (x29), PPR (p4-p6) callee-saves, with +; hazard padding after the PPR callee saves (1024) and after the FPR local area +; (1024) -- coeleased to 2048. Only PPRs restored by moving the SP to +; `FP + addvl #-1`. +define void @sve_locals_only_ppr_csr_vla(i64 %n, <vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector) { +; CHECK-LABEL: sve_locals_only_ppr_csr_vla: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: sub sp, sp, #2048 +; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: add x9, x0, #15 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: // fake_use: $x8 +; CHECK-NEXT: sub x8, x29, #1024 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: str p0, [x29, #-9, mul vl] +; CHECK-NEXT: str z0, [x8, #-3, mul vl] +; CHECK-NEXT: addvl sp, x29, #-1 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret + %alloc = alloca i8, i64 %n, align 1 + %ppr_local = alloca <vscale x 16 x i1> + %zpr_local = alloca <vscale x 16 x i8> + tail call void asm sideeffect "", "~{p4},~{p5},~{p6}"() + call void (...) @llvm.fake.use(ptr %alloc) + store volatile <vscale x 16 x i1> %pred, ptr %ppr_local + store volatile <vscale x 16 x i8> %vector, ptr %zpr_local + ret void +} + +; Only ZPR callee-saves (and ZPR/PPR locals) + a VLA +; Expect: Hazard padding, Frame pointer (x29), ZPR (z8-z10) callee-saves, with +; hazard padding before the ZPR callee saves (1024) and after the ZPR local area +; (1024). Only ZPRs restored by moving the SP to `FP - 1024 + addvl #-4`. +define void @sve_locals_only_zpr_csr_vla(i64 %n, <vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector) { +; CHECK-LABEL: sve_locals_only_zpr_csr_vla: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1056 +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 24 * VG - 1056 +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 32 * VG - 1056 +; CHECK-NEXT: add x9, x0, #15 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: // fake_use: $x8 +; CHECK-NEXT: sub x8, x29, #1024 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: str p0, [x29, #-1, mul vl] +; CHECK-NEXT: str z0, [x8, #-5, mul vl] +; CHECK-NEXT: addvl sp, x8, #-4 +; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret + %alloc = alloca i8, i64 %n, align 1 + %ppr_local = alloca <vscale x 16 x i1> + %zpr_local = alloca <vscale x 16 x i8> + tail call void asm sideeffect "", "~{z8},~{z9},~{z10}"() + call void (...) @llvm.fake.use(ptr %alloc) + store volatile <vscale x 16 x i1> %pred, ptr %ppr_local + store volatile <vscale x 16 x i8> %vector, ptr %zpr_local + ret void +} + +; PPR+ZPR callee-saves (and ZPR/PPR locals) + a VLA +; Expect: Hazard padding, Frame pointer (x29), PPR (p4-p6) and ZPR (z8-z10) +; callee-saves, with hazard padding before the ZPR callee saves (1024) and after +; the ZPR local area (1024). ZPRs restored by moving the SP to +; `FP - 1024 + addvl #-5`, PPRs restored by moving SP to `FP + addvl #-1`. +define void @sve_locals_zpr_ppr_csr_vla(i64 %n, <vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector) { +; CHECK-LABEL: sve_locals_zpr_ppr_csr_vla: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 24 * VG - 1056 +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 32 * VG - 1056 +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 40 * VG - 1056 +; CHECK-NEXT: add x9, x0, #15 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: // fake_use: $x8 +; CHECK-NEXT: sub x8, x29, #1024 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: str p0, [x29, #-9, mul vl] +; CHECK-NEXT: str z0, [x8, #-6, mul vl] +; CHECK-NEXT: addvl sp, x8, #-5 +; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: addvl sp, x29, #-1 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret + %alloc = alloca i8, i64 %n, align 1 + %ppr_local = alloca <vscale x 16 x i1> + %zpr_local = alloca <vscale x 16 x i8> + tail call void asm sideeffect "", "~{p4},~{p5},~{p6},~{z8},~{z9},~{z10}"() + call void (...) @llvm.fake.use(ptr %alloc) + store volatile <vscale x 16 x i1> %pred, ptr %ppr_local + store volatile <vscale x 16 x i8> %vector, ptr %zpr_local + ret void +} |
