; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK ; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -aarch64-new-sme-abi < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-NEWLOWERING declare void @private_za_callee() declare void @shared_za_callee() "aarch64_inout_za" declare void @preserves_za_callee() "aarch64_preserves_za" declare float @llvm.cos.f32(float) ; Test lazy-save mechanism for a single callee. define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" { ; CHECK-COMMON-LABEL: test_lazy_save_1_callee: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 ; CHECK-COMMON-NEXT: mov x9, sp ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 ; CHECK-COMMON-NEXT: mov sp, x9 ; CHECK-COMMON-NEXT: sub x10, x29, #16 ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10 ; CHECK-COMMON-NEXT: bl private_za_callee ; CHECK-COMMON-NEXT: smstart za ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-COMMON-NEXT: sub x0, x29, #16 ; CHECK-COMMON-NEXT: cbnz x8, .LBB0_2 ; CHECK-COMMON-NEXT: // %bb.1: ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore ; CHECK-COMMON-NEXT: .LBB0_2: ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: mov sp, x29 ; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret call void @private_za_callee() ret void } ; Test lazy-save mechanism for multiple callees. define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" { ; CHECK-LABEL: test_lazy_save_2_callees: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: sub x20, x29, #16 ; CHECK-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB1_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB1_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: test_lazy_save_2_callees: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEWLOWERING-NEXT: bl private_za_callee ; CHECK-NEWLOWERING-NEXT: bl private_za_callee ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB1_2 ; CHECK-NEWLOWERING-NEXT: // %bb.1: ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: .LBB1_2: ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: mov sp, x29 ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret call void @private_za_callee() call void @private_za_callee() ret void } ; Test a call of an intrinsic that gets expanded to a library call. define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" { ; CHECK-COMMON-LABEL: test_lazy_save_expanded_intrinsic: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 ; CHECK-COMMON-NEXT: mov x9, sp ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 ; CHECK-COMMON-NEXT: mov sp, x9 ; CHECK-COMMON-NEXT: sub x10, x29, #16 ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10 ; CHECK-COMMON-NEXT: bl cosf ; CHECK-COMMON-NEXT: smstart za ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-COMMON-NEXT: sub x0, x29, #16 ; CHECK-COMMON-NEXT: cbnz x8, .LBB2_2 ; CHECK-COMMON-NEXT: // %bb.1: ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore ; CHECK-COMMON-NEXT: .LBB2_2: ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: mov sp, x29 ; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret %res = call float @llvm.cos.f32(float %a) ret float %res } ; Test a combination of streaming-compatible -> normal call with lazy-save. define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za" "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: test_lazy_save_and_conditional_smstart: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: add x29, sp, #64 ; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mrs x20, SVCR ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: sub x10, x29, #80 ; CHECK-NEXT: stp x9, x8, [x29, #-80] ; CHECK-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEXT: tbz w20, #0, .LBB3_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB3_2: ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: tbz w20, #0, .LBB3_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB3_4: ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #80 ; CHECK-NEXT: cbnz x8, .LBB3_6 ; CHECK-NEXT: // %bb.5: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB3_6: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: sub sp, x29, #64 ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: test_lazy_save_and_conditional_smstart: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: add x29, sp, #64 ; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-80] ; CHECK-NEWLOWERING-NEXT: mrs x20, SVCR ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #80 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB3_2 ; CHECK-NEWLOWERING-NEXT: // %bb.1: ; CHECK-NEWLOWERING-NEXT: smstop sm ; CHECK-NEWLOWERING-NEXT: .LBB3_2: ; CHECK-NEWLOWERING-NEXT: bl private_za_callee ; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB3_4 ; CHECK-NEWLOWERING-NEXT: // %bb.3: ; CHECK-NEWLOWERING-NEXT: smstart sm ; CHECK-NEWLOWERING-NEXT: .LBB3_4: ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #80 ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB3_6 ; CHECK-NEWLOWERING-NEXT: // %bb.5: ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: .LBB3_6: ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64 ; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret call void @private_za_callee() ret void } ; Note: For the final @private_za_callee() we setup a lazy save then don't ; restore from it (since ZA is off on return). We could improve this case ; by turning ZA off before the final private ZA call. define void @test_lazy_save_mixed_shared_and_private_callees() "aarch64_new_za" ; CHECK-LABEL: test_lazy_save_mixed_shared_and_private_callees: ; CHECK: // %bb.0: // %prelude ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: cbz x8, .LBB4_2 ; CHECK-NEXT: // %bb.1: // %save.za ; CHECK-NEXT: bl __arm_tpidr2_save ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: .LBB4_2: ; CHECK-NEXT: smstart za ; CHECK-NEXT: sub x20, x29, #16 ; CHECK-NEXT: zero {za} ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB4_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB4_4: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: bl shared_za_callee ; CHECK-NEXT: bl preserves_za_callee ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB4_6 ; CHECK-NEXT: // %bb.5: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB4_6: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: smstop za ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: test_lazy_save_mixed_shared_and_private_callees: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEWLOWERING-NEXT: .cfi_offset w19, -16 ; CHECK-NEWLOWERING-NEXT: .cfi_offset w30, -24 ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -32 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: cbz x8, .LBB4_2 ; CHECK-NEWLOWERING-NEXT: // %bb.1: ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_save ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: zero {za} ; CHECK-NEWLOWERING-NEXT: .LBB4_2: ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEWLOWERING-NEXT: bl private_za_callee ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB4_4 ; CHECK-NEWLOWERING-NEXT: // %bb.3: ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: .LBB4_4: ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: bl shared_za_callee ; CHECK-NEWLOWERING-NEXT: bl preserves_za_callee ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEWLOWERING-NEXT: bl private_za_callee ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: smstop za ; CHECK-NEWLOWERING-NEXT: mov sp, x29 ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret { call void @private_za_callee() call void @shared_za_callee() call void @preserves_za_callee() call void @private_za_callee() ret void } define void @test_many_back2back_private_za_calls() "aarch64_inout_za" { ; CHECK-LABEL: test_many_back2back_private_za_calls: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEXT: bl shared_za_callee ; CHECK-NEXT: sub x20, x29, #16 ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB5_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB5_2: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB5_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB5_4: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB5_6 ; CHECK-NEXT: // %bb.5: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB5_6: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB5_8 ; CHECK-NEXT: // %bb.7: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB5_8: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB5_10 ; CHECK-NEXT: // %bb.9: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB5_10: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB5_12 ; CHECK-NEXT: // %bb.11: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB5_12: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: bl shared_za_callee ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: test_many_back2back_private_za_calls: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa w29, 32 ; CHECK-NEWLOWERING-NEXT: .cfi_offset w19, -16 ; CHECK-NEWLOWERING-NEXT: .cfi_offset w30, -24 ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -32 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEWLOWERING-NEXT: bl shared_za_callee ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEWLOWERING-NEXT: bl private_za_callee ; CHECK-NEWLOWERING-NEXT: bl private_za_callee ; CHECK-NEWLOWERING-NEXT: bl private_za_callee ; CHECK-NEWLOWERING-NEXT: bl private_za_callee ; CHECK-NEWLOWERING-NEXT: bl private_za_callee ; CHECK-NEWLOWERING-NEXT: bl private_za_callee ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB5_2 ; CHECK-NEWLOWERING-NEXT: // %bb.1: ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: .LBB5_2: ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: bl shared_za_callee ; CHECK-NEWLOWERING-NEXT: mov sp, x29 ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret call void @shared_za_callee() call void @private_za_callee() call void @private_za_callee() call void @private_za_callee() call void @private_za_callee() call void @private_za_callee() call void @private_za_callee() call void @shared_za_callee() ret void } define void @test_shared_private_shared() nounwind "aarch64_inout_za" { ; CHECK-COMMON-LABEL: test_shared_private_shared: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 ; CHECK-COMMON-NEXT: mov x9, sp ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 ; CHECK-COMMON-NEXT: mov sp, x9 ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] ; CHECK-COMMON-NEXT: bl shared_za_callee ; CHECK-COMMON-NEXT: sub x8, x29, #16 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8 ; CHECK-COMMON-NEXT: bl private_za_callee ; CHECK-COMMON-NEXT: smstart za ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-COMMON-NEXT: sub x0, x29, #16 ; CHECK-COMMON-NEXT: cbnz x8, .LBB6_2 ; CHECK-COMMON-NEXT: // %bb.1: ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore ; CHECK-COMMON-NEXT: .LBB6_2: ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: bl shared_za_callee ; CHECK-COMMON-NEXT: mov sp, x29 ; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret call void @shared_za_callee() call void @private_za_callee() call void @shared_za_callee() ret void } define void @test_only_shared_za() nounwind "aarch64_inout_za" { ; CHECK-COMMON-LABEL: test_only_shared_za: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-COMMON-NEXT: bl shared_za_callee ; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ret call void @shared_za_callee() ret void } declare i64 @shared_za_callee_i64(i64) "aarch64_inout_za" declare i64 @private_za_callee_i64(i64) define i64 @test_shared_private_shared_i64(i64 %x) nounwind "aarch64_inout_za" { ; CHECK-COMMON-LABEL: test_shared_private_shared_i64: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 ; CHECK-COMMON-NEXT: mov x9, sp ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 ; CHECK-COMMON-NEXT: mov sp, x9 ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] ; CHECK-COMMON-NEXT: bl shared_za_callee_i64 ; CHECK-COMMON-NEXT: sub x8, x29, #16 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8 ; CHECK-COMMON-NEXT: bl private_za_callee_i64 ; CHECK-COMMON-NEXT: mov x1, x0 ; CHECK-COMMON-NEXT: smstart za ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-COMMON-NEXT: sub x0, x29, #16 ; CHECK-COMMON-NEXT: cbnz x8, .LBB8_2 ; CHECK-COMMON-NEXT: // %bb.1: ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore ; CHECK-COMMON-NEXT: .LBB8_2: ; CHECK-COMMON-NEXT: mov x0, x1 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: bl shared_za_callee_i64 ; CHECK-COMMON-NEXT: mov sp, x29 ; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret %a = call i64 @shared_za_callee_i64(i64 %x) %b = call i64 @private_za_callee_i64(i64 %a) %c = call i64 @shared_za_callee_i64(i64 %b) ret i64 %c } declare i64 @many_args_private_za_callee( i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) ; In this example some arguments are passed on the stack, which decrements the ; stack pointer before the call -- in this test the lazy save should be setup ; before the stack decrement. define i64 @test_many_callee_arguments( ; CHECK-LABEL: test_many_callee_arguments: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: rdsvl x9, #1 ; CHECK-NEXT: msub x8, x9, x9, x8 ; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: ldp x10, x11, [x29, #32] ; CHECK-NEXT: sub x12, x29, #16 ; CHECK-NEXT: stp x8, x9, [x29, #-16] ; CHECK-NEXT: msr TPIDR2_EL0, x12 ; CHECK-NEXT: stp x10, x11, [sp, #-16]! ; CHECK-NEXT: bl many_args_private_za_callee ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: mov x1, x0 ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB9_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: test_many_callee_arguments: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: ldp x10, x11, [x29, #32] ; CHECK-NEWLOWERING-NEXT: sub x12, x29, #16 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x12 ; CHECK-NEWLOWERING-NEXT: stp x10, x11, [sp, #-16]! ; CHECK-NEWLOWERING-NEXT: bl many_args_private_za_callee ; CHECK-NEWLOWERING-NEXT: add sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: mov x8, x0 ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x9, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x9, .LBB9_2 ; CHECK-NEWLOWERING-NEXT: // %bb.1: ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: .LBB9_2: ; CHECK-NEWLOWERING-NEXT: mov x0, x8 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: mov sp, x29 ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9 ) nounwind "aarch64_inout_za" { %ret = call i64 @many_args_private_za_callee( i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9) ret i64 %ret }