; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -aarch64-new-sme-abi < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-NEWLOWERING declare void @private_za_call() declare void @shared_za_call() "aarch64_inout_za" define void @private_za_loop(i32 %n) "aarch64_inout_za" nounwind { ; CHECK-LABEL: private_za_loop: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: cmp w0, #1 ; CHECK-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEXT: b.lt .LBB0_5 ; CHECK-NEXT: // %bb.1: // %loop.preheader ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: sub x20, x29, #16 ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .LBB0_2: // %loop ; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: subs w19, w19, #1 ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: b.eq .LBB0_5 ; CHECK-NEXT: .LBB0_3: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_call ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB0_2 ; CHECK-NEXT: // %bb.4: // %loop ; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_5: // %exit ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: private_za_loop: ; CHECK-NEWLOWERING: // %bb.0: // %entry ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16 ; CHECK-NEWLOWERING-NEXT: cmp w0, #1 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEWLOWERING-NEXT: b.lt .LBB0_3 ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %loop.preheader ; CHECK-NEWLOWERING-NEXT: mov w19, w0 ; CHECK-NEWLOWERING-NEXT: .LBB0_2: // %loop ; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEWLOWERING-NEXT: bl private_za_call ; CHECK-NEWLOWERING-NEXT: subs w19, w19, #1 ; CHECK-NEWLOWERING-NEXT: b.ne .LBB0_2 ; CHECK-NEWLOWERING-NEXT: .LBB0_3: // %exit ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB0_5 ; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: .LBB0_5: // %exit ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: mov sp, x29 ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret entry: %cmpgt = icmp sgt i32 %n, 0 br i1 %cmpgt, label %loop, label %exit loop: %iv = phi i32 [ %next_iv, %loop ], [ 0, %entry ] tail call void @private_za_call() %next_iv = add nuw nsw i32 %iv, 1 %cmpeq = icmp eq i32 %next_iv, %n br i1 %cmpeq, label %exit, label %loop exit: ret void } ; FIXME: In the new lowering we could weight edges to avoid doing the lazy save in the loop. define void @private_za_loop_active_entry_and_exit(i32 %n) "aarch64_inout_za" nounwind { ; CHECK-LABEL: private_za_loop_active_entry_and_exit: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEXT: bl shared_za_call ; CHECK-NEXT: cmp w19, #1 ; CHECK-NEXT: b.lt .LBB1_5 ; CHECK-NEXT: // %bb.1: // %loop.preheader ; CHECK-NEXT: sub x20, x29, #16 ; CHECK-NEXT: b .LBB1_3 ; CHECK-NEXT: .LBB1_2: // %loop ; CHECK-NEXT: // in Loop: Header=BB1_3 Depth=1 ; CHECK-NEXT: subs w19, w19, #1 ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: b.eq .LBB1_5 ; CHECK-NEXT: .LBB1_3: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_call ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB1_2 ; CHECK-NEXT: // %bb.4: // %loop ; CHECK-NEXT: // in Loop: Header=BB1_3 Depth=1 ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: b .LBB1_2 ; CHECK-NEXT: .LBB1_5: // %exit ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: b shared_za_call ; ; CHECK-NEWLOWERING-LABEL: private_za_loop_active_entry_and_exit: ; CHECK-NEWLOWERING: // %bb.0: // %entry ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: mov w19, w0 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEWLOWERING-NEXT: bl shared_za_call ; CHECK-NEWLOWERING-NEXT: cmp w19, #1 ; CHECK-NEWLOWERING-NEXT: b.lt .LBB1_5 ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %loop.preheader ; CHECK-NEWLOWERING-NEXT: sub x20, x29, #16 ; CHECK-NEWLOWERING-NEXT: b .LBB1_3 ; CHECK-NEWLOWERING-NEXT: .LBB1_2: // %loop ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB1_3 Depth=1 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: cbz w19, .LBB1_5 ; CHECK-NEWLOWERING-NEXT: .LBB1_3: // %loop ; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEWLOWERING-NEXT: bl private_za_call ; CHECK-NEWLOWERING-NEXT: sub w19, w19, #1 ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB1_2 ; CHECK-NEWLOWERING-NEXT: // %bb.4: // %loop ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB1_3 Depth=1 ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: b .LBB1_2 ; CHECK-NEWLOWERING-NEXT: .LBB1_5: // %exit ; CHECK-NEWLOWERING-NEXT: mov sp, x29 ; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: b shared_za_call entry: %cmpgt = icmp sgt i32 %n, 0 tail call void @shared_za_call() br i1 %cmpgt, label %loop, label %exit loop: %iv = phi i32 [ %next_iv, %loop ], [ 0, %entry ] tail call void @private_za_call() %next_iv = add nuw nsw i32 %iv, 1 %cmpeq = icmp eq i32 %next_iv, %n br i1 %cmpeq, label %exit, label %loop exit: tail call void @shared_za_call() ret void } define void @shared_za_loop(i32 %n) "aarch64_inout_za" nounwind { ; CHECK-COMMON-LABEL: shared_za_loop: ; CHECK-COMMON: // %bb.0: // %entry ; CHECK-COMMON-NEXT: cmp w0, #1 ; CHECK-COMMON-NEXT: b.lt .LBB2_4 ; CHECK-COMMON-NEXT: // %bb.1: // %loop.preheader ; CHECK-COMMON-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill ; CHECK-COMMON-NEXT: mov w19, w0 ; CHECK-COMMON-NEXT: .LBB2_2: // %loop ; CHECK-COMMON-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-COMMON-NEXT: bl shared_za_call ; CHECK-COMMON-NEXT: subs w19, w19, #1 ; CHECK-COMMON-NEXT: b.ne .LBB2_2 ; CHECK-COMMON-NEXT: // %bb.3: ; CHECK-COMMON-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: .LBB2_4: // %exit ; CHECK-COMMON-NEXT: ret entry: %cmpgt = icmp sgt i32 %n, 0 br i1 %cmpgt, label %loop, label %exit loop: %iv = phi i32 [ %next_iv, %loop ], [ 0, %entry ] tail call void @shared_za_call() %next_iv = add nuw nsw i32 %iv, 1 %cmpeq = icmp eq i32 %next_iv, %n br i1 %cmpeq, label %exit, label %loop exit: ret void } ; FIXME: The codegen for this case could be improved (by tuning weights). ; Here the ZA save has been hoisted out of the conditional, but would be better ; to sink it. define void @cond_private_za_call(i1 %cond) "aarch64_inout_za" nounwind { ; CHECK-LABEL: cond_private_za_call: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEXT: tbz w0, #0, .LBB3_4 ; CHECK-NEXT: // %bb.1: // %private_za_call ; CHECK-NEXT: sub x8, x29, #16 ; CHECK-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEXT: bl private_za_call ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB3_3 ; CHECK-NEXT: // %bb.2: // %private_za_call ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB3_3: // %private_za_call ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: .LBB3_4: // %exit ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: b shared_za_call ; ; CHECK-NEWLOWERING-LABEL: cond_private_za_call: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB3_2 ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %private_za_call ; CHECK-NEWLOWERING-NEXT: bl private_za_call ; CHECK-NEWLOWERING-NEXT: .LBB3_2: // %exit ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB3_4 ; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: .LBB3_4: // %exit ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: mov sp, x29 ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: b shared_za_call br i1 %cond, label %private_za_call, label %exit private_za_call: tail call void @private_za_call() br label %exit exit: tail call void @shared_za_call() ret void } define void @mixed_shared_private_za_loop(ptr %cond) "aarch64_inout_za" nounwind { ; CHECK-LABEL: mixed_shared_private_za_loop: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: sub x20, x29, #16 ; CHECK-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEXT: b .LBB4_2 ; CHECK-NEXT: .LBB4_1: // %loop ; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1 ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: ldrb w8, [x19] ; CHECK-NEXT: tbz w8, #0, .LBB4_4 ; CHECK-NEXT: .LBB4_2: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: bl shared_za_call ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_call ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB4_1 ; CHECK-NEXT: // %bb.3: // %loop ; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1 ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: b .LBB4_1 ; CHECK-NEXT: .LBB4_4: // %exit ; CHECK-NEXT: bl shared_za_call ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: mixed_shared_private_za_loop: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: mov x19, x0 ; CHECK-NEWLOWERING-NEXT: sub x20, x29, #16 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEWLOWERING-NEXT: b .LBB4_2 ; CHECK-NEWLOWERING-NEXT: .LBB4_1: // %loop ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB4_2 Depth=1 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: tbz w8, #0, .LBB4_4 ; CHECK-NEWLOWERING-NEXT: .LBB4_2: // %loop ; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEWLOWERING-NEXT: bl shared_za_call ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEWLOWERING-NEXT: bl private_za_call ; CHECK-NEWLOWERING-NEXT: ldrb w8, [x19] ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x9, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x9, .LBB4_1 ; CHECK-NEWLOWERING-NEXT: // %bb.3: // %loop ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB4_2 Depth=1 ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: b .LBB4_1 ; CHECK-NEWLOWERING-NEXT: .LBB4_4: // %exit ; CHECK-NEWLOWERING-NEXT: bl shared_za_call ; CHECK-NEWLOWERING-NEXT: mov sp, x29 ; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret br label %loop loop: call void @shared_za_call() call void @private_za_call() br label %latch latch: %bool = load volatile i8, ptr %cond, align 1 %trunc = trunc i8 %bool to i1 br i1 %trunc, label %loop, label %exit exit: call void @shared_za_call() ret void } define void @cond_clobber_followed_by_clobber(i1 %cond) "aarch64_inout_za" nounwind { ; CHECK-LABEL: cond_clobber_followed_by_clobber: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEXT: bl shared_za_call ; CHECK-NEXT: tbz w19, #0, .LBB5_4 ; CHECK-NEXT: // %bb.1: // %cond_clobber ; CHECK-NEXT: sub x8, x29, #16 ; CHECK-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEXT: bl private_za_call ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB5_3 ; CHECK-NEXT: // %bb.2: // %cond_clobber ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB5_3: // %cond_clobber ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: .LBB5_4: // %exit ; CHECK-NEXT: sub x8, x29, #16 ; CHECK-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEXT: bl private_za_call ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB5_6 ; CHECK-NEXT: // %bb.5: // %exit ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB5_6: // %exit ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: b shared_za_call ; ; CHECK-NEWLOWERING-LABEL: cond_clobber_followed_by_clobber: ; CHECK-NEWLOWERING: // %bb.0: ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: mov w19, w0 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEWLOWERING-NEXT: bl shared_za_call ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEWLOWERING-NEXT: tbz w19, #0, .LBB5_2 ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %cond_clobber ; CHECK-NEWLOWERING-NEXT: bl private_za_call ; CHECK-NEWLOWERING-NEXT: .LBB5_2: // %exit ; CHECK-NEWLOWERING-NEXT: bl private_za_call ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB5_4 ; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: .LBB5_4: // %exit ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: mov sp, x29 ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: b shared_za_call tail call void @shared_za_call() br i1 %cond, label %cond_clobber, label %exit cond_clobber: tail call void @private_za_call() br label %exit exit: tail call void @private_za_call() tail call void @shared_za_call() ret void } define void @conditionally_use_za(i1 %cond) "aarch64_inout_za" nounwind { ; CHECK-COMMON-LABEL: conditionally_use_za: ; CHECK-COMMON: // %bb.0: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 ; CHECK-COMMON-NEXT: mov x9, sp ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 ; CHECK-COMMON-NEXT: mov sp, x9 ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] ; CHECK-COMMON-NEXT: tbz w0, #0, .LBB6_4 ; CHECK-COMMON-NEXT: // %bb.1: // %use_za ; CHECK-COMMON-NEXT: bl shared_za_call ; CHECK-COMMON-NEXT: sub x8, x29, #16 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8 ; CHECK-COMMON-NEXT: bl private_za_call ; CHECK-COMMON-NEXT: smstart za ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-COMMON-NEXT: sub x0, x29, #16 ; CHECK-COMMON-NEXT: cbnz x8, .LBB6_3 ; CHECK-COMMON-NEXT: // %bb.2: // %use_za ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore ; CHECK-COMMON-NEXT: .LBB6_3: // %use_za ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: .LBB6_4: // %exit ; CHECK-COMMON-NEXT: mov sp, x29 ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret br i1 %cond, label %use_za, label %exit use_za: tail call void @shared_za_call() tail call void @private_za_call() br label %exit exit: ret void } define void @diamond_mixed_za_merge_shared(i1 %cond) "aarch64_inout_za" nounwind { ; CHECK-COMMON-LABEL: diamond_mixed_za_merge_shared: ; CHECK-COMMON: // %bb.0: // %entry ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 ; CHECK-COMMON-NEXT: mov x9, sp ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 ; CHECK-COMMON-NEXT: mov sp, x9 ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] ; CHECK-COMMON-NEXT: tbz w0, #0, .LBB7_2 ; CHECK-COMMON-NEXT: // %bb.1: // %then ; CHECK-COMMON-NEXT: bl shared_za_call ; CHECK-COMMON-NEXT: b .LBB7_5 ; CHECK-COMMON-NEXT: .LBB7_2: // %else ; CHECK-COMMON-NEXT: sub x8, x29, #16 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8 ; CHECK-COMMON-NEXT: bl private_za_call ; CHECK-COMMON-NEXT: smstart za ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-COMMON-NEXT: sub x0, x29, #16 ; CHECK-COMMON-NEXT: cbnz x8, .LBB7_4 ; CHECK-COMMON-NEXT: // %bb.3: // %else ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore ; CHECK-COMMON-NEXT: .LBB7_4: // %else ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: .LBB7_5: // %merge_shared ; CHECK-COMMON-NEXT: bl shared_za_call ; CHECK-COMMON-NEXT: mov sp, x29 ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret entry: br i1 %cond, label %then, label %else then: call void @shared_za_call() br label %merge_shared else: call void @private_za_call() br label %merge_shared merge_shared: call void @shared_za_call() ret void } define void @diamond_mixed_za_merge_private(i1 %cond) "aarch64_inout_za" nounwind { ; CHECK-LABEL: diamond_mixed_za_merge_private: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEXT: tbz w0, #0, .LBB8_2 ; CHECK-NEXT: // %bb.1: // %then ; CHECK-NEXT: bl shared_za_call ; CHECK-NEXT: b .LBB8_5 ; CHECK-NEXT: .LBB8_2: // %else ; CHECK-NEXT: sub x8, x29, #16 ; CHECK-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEXT: bl private_za_call ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB8_4 ; CHECK-NEXT: // %bb.3: // %else ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB8_4: // %else ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: .LBB8_5: // %merge_private_za ; CHECK-NEXT: sub x8, x29, #16 ; CHECK-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEXT: bl private_za_call ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB8_7 ; CHECK-NEXT: // %bb.6: // %merge_private_za ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB8_7: // %merge_private_za ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: diamond_mixed_za_merge_private: ; CHECK-NEWLOWERING: // %bb.0: // %entry ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB8_2 ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %then ; CHECK-NEWLOWERING-NEXT: bl shared_za_call ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEWLOWERING-NEXT: b .LBB8_3 ; CHECK-NEWLOWERING-NEXT: .LBB8_2: // %else ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEWLOWERING-NEXT: bl private_za_call ; CHECK-NEWLOWERING-NEXT: .LBB8_3: // %merge_private_za ; CHECK-NEWLOWERING-NEXT: bl private_za_call ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB8_5 ; CHECK-NEWLOWERING-NEXT: // %bb.4: // %merge_private_za ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: .LBB8_5: // %merge_private_za ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: mov sp, x29 ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret entry: br i1 %cond, label %then, label %else then: call void @shared_za_call() br label %merge_private_za else: call void @private_za_call() br label %merge_private_za merge_private_za: call void @private_za_call() ret void } define void @critical_edge_mixed_za(i1 %c1, i1 %c2) "aarch64_inout_za" nounwind { ; CHECK-LABEL: critical_edge_mixed_za: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEXT: tbz w0, #0, .LBB9_5 ; CHECK-NEXT: // %bb.1: // %shared_path ; CHECK-NEXT: bl shared_za_call ; CHECK-NEXT: tbz w19, #0, .LBB9_8 ; CHECK-NEXT: .LBB9_2: // %exit_private ; CHECK-NEXT: sub x8, x29, #16 ; CHECK-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEXT: bl private_za_call ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB9_4 ; CHECK-NEXT: // %bb.3: // %exit_private ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB9_4: // %exit_private ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: b .LBB9_9 ; CHECK-NEXT: .LBB9_5: // %private_path ; CHECK-NEXT: sub x8, x29, #16 ; CHECK-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEXT: bl private_za_call ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB9_7 ; CHECK-NEXT: // %bb.6: // %private_path ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB9_7: // %private_path ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: tbnz w19, #0, .LBB9_2 ; CHECK-NEXT: .LBB9_8: // %exit_shared ; CHECK-NEXT: bl shared_za_call ; CHECK-NEXT: .LBB9_9: // %common.ret ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: critical_edge_mixed_za: ; CHECK-NEWLOWERING: // %bb.0: // %entry ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: mov w19, w1 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB9_5 ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %shared_path ; CHECK-NEWLOWERING-NEXT: bl shared_za_call ; CHECK-NEWLOWERING-NEXT: tbz w19, #0, .LBB9_8 ; CHECK-NEWLOWERING-NEXT: .LBB9_2: // %exit_private ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEWLOWERING-NEXT: bl private_za_call ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB9_4 ; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit_private ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: .LBB9_4: // %exit_private ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: b .LBB9_9 ; CHECK-NEWLOWERING-NEXT: .LBB9_5: // %private_path ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEWLOWERING-NEXT: bl private_za_call ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB9_7 ; CHECK-NEWLOWERING-NEXT: // %bb.6: // %private_path ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: .LBB9_7: // %private_path ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: tbnz w19, #0, .LBB9_2 ; CHECK-NEWLOWERING-NEXT: .LBB9_8: // %exit_shared ; CHECK-NEWLOWERING-NEXT: bl shared_za_call ; CHECK-NEWLOWERING-NEXT: .LBB9_9: // %common.ret ; CHECK-NEWLOWERING-NEXT: mov sp, x29 ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret entry: br i1 %c1, label %shared_path, label %private_path shared_path: call void @shared_za_call() br label %merge private_path: call void @private_za_call() br label %merge merge: br i1 %c2, label %exit_private, label %exit_shared exit_private: call void @private_za_call() ret void exit_shared: call void @shared_za_call() ret void } define void @nested_cond_in_loop(i32 %n, i1 %cond) "aarch64_inout_za" nounwind { ; CHECK-COMMON-LABEL: nested_cond_in_loop: ; CHECK-COMMON: // %bb.0: // %entry ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 ; CHECK-COMMON-NEXT: mov x9, sp ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 ; CHECK-COMMON-NEXT: mov sp, x9 ; CHECK-COMMON-NEXT: cmp w0, #1 ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] ; CHECK-COMMON-NEXT: b.lt .LBB10_8 ; CHECK-COMMON-NEXT: // %bb.1: // %loop.preheader ; CHECK-COMMON-NEXT: mov w19, w1 ; CHECK-COMMON-NEXT: mov w20, w0 ; CHECK-COMMON-NEXT: mov w21, wzr ; CHECK-COMMON-NEXT: sub x22, x29, #16 ; CHECK-COMMON-NEXT: b .LBB10_4 ; CHECK-COMMON-NEXT: .LBB10_2: // %use_shared ; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1 ; CHECK-COMMON-NEXT: bl shared_za_call ; CHECK-COMMON-NEXT: .LBB10_3: // %latch ; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1 ; CHECK-COMMON-NEXT: add w21, w21, #1 ; CHECK-COMMON-NEXT: cmp w21, w20 ; CHECK-COMMON-NEXT: b.ge .LBB10_8 ; CHECK-COMMON-NEXT: .LBB10_4: // %loop ; CHECK-COMMON-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-COMMON-NEXT: tbnz w19, #0, .LBB10_2 ; CHECK-COMMON-NEXT: // %bb.5: // %use_private ; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x22 ; CHECK-COMMON-NEXT: bl private_za_call ; CHECK-COMMON-NEXT: smstart za ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-COMMON-NEXT: sub x0, x29, #16 ; CHECK-COMMON-NEXT: cbnz x8, .LBB10_7 ; CHECK-COMMON-NEXT: // %bb.6: // %use_private ; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1 ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore ; CHECK-COMMON-NEXT: .LBB10_7: // %use_private ; CHECK-COMMON-NEXT: // in Loop: Header=BB10_4 Depth=1 ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr ; CHECK-COMMON-NEXT: b .LBB10_3 ; CHECK-COMMON-NEXT: .LBB10_8: // %exit ; CHECK-COMMON-NEXT: mov sp, x29 ; CHECK-COMMON-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret entry: %cmp = icmp sgt i32 %n, 0 br i1 %cmp, label %loop, label %exit loop: %iv = phi i32 [ 0, %entry ], [ %inc, %latch ] br i1 %cond, label %use_shared, label %use_private use_shared: call void @shared_za_call() br label %latch use_private: call void @private_za_call() br label %latch latch: %inc = add i32 %iv, 1 %cmp2 = icmp slt i32 %inc, %n br i1 %cmp2, label %loop, label %exit exit: ret void } define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwind { ; CHECK-LABEL: loop_with_external_entry: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEXT: tbz w0, #0, .LBB11_2 ; CHECK-NEXT: // %bb.1: // %init ; CHECK-NEXT: bl shared_za_call ; CHECK-NEXT: .LBB11_2: // %loop.preheader ; CHECK-NEXT: sub x20, x29, #16 ; CHECK-NEXT: b .LBB11_4 ; CHECK-NEXT: .LBB11_3: // %loop ; CHECK-NEXT: // in Loop: Header=BB11_4 Depth=1 ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: tbz w19, #0, .LBB11_6 ; CHECK-NEXT: .LBB11_4: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_call ; CHECK-NEXT: smstart za ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB11_3 ; CHECK-NEXT: // %bb.5: // %loop ; CHECK-NEXT: // in Loop: Header=BB11_4 Depth=1 ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: b .LBB11_3 ; CHECK-NEXT: .LBB11_6: // %exit ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret ; ; CHECK-NEWLOWERING-LABEL: loop_with_external_entry: ; CHECK-NEWLOWERING: // %bb.0: // %entry ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEWLOWERING-NEXT: mov x29, sp ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 ; CHECK-NEWLOWERING-NEXT: mov x9, sp ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: mov w19, w1 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] ; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB11_2 ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %init ; CHECK-NEWLOWERING-NEXT: bl shared_za_call ; CHECK-NEWLOWERING-NEXT: .LBB11_2: // %loop.preheader ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEWLOWERING-NEXT: .LBB11_3: // %loop ; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEWLOWERING-NEXT: bl private_za_call ; CHECK-NEWLOWERING-NEXT: tbnz w19, #0, .LBB11_3 ; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit ; CHECK-NEWLOWERING-NEXT: smstart za ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_6 ; CHECK-NEWLOWERING-NEXT: // %bb.5: // %exit ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore ; CHECK-NEWLOWERING-NEXT: .LBB11_6: // %exit ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEWLOWERING-NEXT: mov sp, x29 ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEWLOWERING-NEXT: ret entry: br i1 %c1, label %init, label %loop init: call void @shared_za_call() br label %loop loop: call void @private_za_call() br i1 %c2, label %loop, label %exit exit: ret void }