; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s ; ; Private-ZA Callee ; ; Expect spill & fill of ZT0 around call ; Expect smstop/smstart za around call define void @zt0_in_caller_no_state_callee(ptr %callee) "aarch64_in_zt0" nounwind { ; CHECK-LABEL: zt0_in_caller_no_state_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, sp ; CHECK-NEXT: str zt0, [x19] ; CHECK-NEXT: smstop za ; CHECK-NEXT: blr x0 ; CHECK-NEXT: smstart za ; CHECK-NEXT: ldr zt0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret call void %callee(); ret void; } ; Expect spill & fill of ZT0 around call ; Expect setup and restore lazy-save around call ; Expect smstart za after call define void @za_zt0_shared_caller_no_state_callee(ptr %callee) "aarch64_inout_za" "aarch64_in_zt0" nounwind { ; CHECK-LABEL: za_zt0_shared_caller_no_state_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill ; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: stur x9, [x29, #-16] ; CHECK-NEXT: sub x9, x29, #16 ; CHECK-NEXT: sub x19, x29, #80 ; CHECK-NEXT: sturh wzr, [x29, #-6] ; CHECK-NEXT: stur wzr, [x29, #-4] ; CHECK-NEXT: sturh w8, [x29, #-8] ; CHECK-NEXT: msr TPIDR2_EL0, x9 ; CHECK-NEXT: str zt0, [x19] ; CHECK-NEXT: blr x0 ; CHECK-NEXT: smstart za ; CHECK-NEXT: ldr zt0, [x19] ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB1_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret call void %callee(); ret void; } ; ; Shared-ZA Callee ; ; Caller and callee have shared ZT0 state, no spill/fill of ZT0 required define void @zt0_shared_caller_zt0_shared_callee(ptr %callee) "aarch64_in_zt0" nounwind { ; CHECK-LABEL: zt0_shared_caller_zt0_shared_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: blr x0 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret call void %callee() "aarch64_in_zt0"; ret void; } ; Expect spill & fill of ZT0 around call define void @za_zt0_shared_caller_za_shared_callee(ptr %callee) "aarch64_inout_za" "aarch64_in_zt0" nounwind { ; CHECK-LABEL: za_zt0_shared_caller_za_shared_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, sp ; CHECK-NEXT: str zt0, [x19] ; CHECK-NEXT: blr x0 ; CHECK-NEXT: ldr zt0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret call void %callee() "aarch64_inout_za"; ret void; } ; Caller and callee have shared ZA & ZT0 define void @za_zt0_shared_caller_za_zt0_shared_callee(ptr %callee) "aarch64_inout_za" "aarch64_in_zt0" nounwind { ; CHECK-LABEL: za_zt0_shared_caller_za_zt0_shared_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: blr x0 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret call void %callee() "aarch64_inout_za" "aarch64_in_zt0"; ret void; } ; New-ZT0 Callee ; Expect spill & fill of ZT0 around call ; Expect smstop/smstart za around call define void @zt0_in_caller_zt0_new_callee(ptr %callee) "aarch64_in_zt0" nounwind { ; CHECK-LABEL: zt0_in_caller_zt0_new_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, sp ; CHECK-NEXT: str zt0, [x19] ; CHECK-NEXT: smstop za ; CHECK-NEXT: blr x0 ; CHECK-NEXT: smstart za ; CHECK-NEXT: ldr zt0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret call void %callee() "aarch64_new_zt0"; ret void; } ; New-ZT0 Callee ; Expect commit of lazy-save if ZA is dormant ; Expect smstart ZA & clear ZT0 ; Expect spill & fill of ZT0 around call ; Before return, expect smstop ZA define void @zt0_new_caller_zt0_new_callee(ptr %callee) "aarch64_new_zt0" nounwind { ; CHECK-LABEL: zt0_new_caller_zt0_new_callee: ; CHECK: // %bb.0: // %prelude ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: cbz x8, .LBB6_2 ; CHECK-NEXT: // %bb.1: // %save.za ; CHECK-NEXT: bl __arm_tpidr2_save ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: .LBB6_2: ; CHECK-NEXT: smstart za ; CHECK-NEXT: zero { zt0 } ; CHECK-NEXT: mov x19, sp ; CHECK-NEXT: str zt0, [x19] ; CHECK-NEXT: smstop za ; CHECK-NEXT: blr x0 ; CHECK-NEXT: smstart za ; CHECK-NEXT: ldr zt0, [x19] ; CHECK-NEXT: smstop za ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret call void %callee() "aarch64_new_zt0"; ret void; } ; Expect commit of lazy-save if ZA is dormant ; Expect smstart ZA & clear ZT0 ; No spill & fill of ZT0 around __arm_tpidr2_save ; Expect spill & fill of ZT0 around __arm_sme_state call ; Before return, expect smstop ZA define i64 @zt0_new_caller_abi_routine_callee() "aarch64_new_zt0" nounwind { ; CHECK-LABEL: zt0_new_caller_abi_routine_callee: ; CHECK: // %bb.0: // %prelude ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: cbz x8, .LBB7_2 ; CHECK-NEXT: // %bb.1: // %save.za ; CHECK-NEXT: bl __arm_tpidr2_save ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: smstart za ; CHECK-NEXT: zero { zt0 } ; CHECK-NEXT: mov x19, sp ; CHECK-NEXT: str zt0, [x19] ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: ldr zt0, [x19] ; CHECK-NEXT: smstop za ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret %res = call {i64, i64} @__arm_sme_state() %res.0 = extractvalue {i64, i64} %res, 0 ret i64 %res.0 } declare {i64, i64} @__arm_sme_state() ; ; New-ZA Caller ; ; Expect commit of lazy-save if ZA is dormant ; Expect smstart ZA & clear ZT0 ; Before return, expect smstop ZA define void @zt0_new_caller(ptr %callee) "aarch64_new_zt0" nounwind { ; CHECK-LABEL: zt0_new_caller: ; CHECK: // %bb.0: // %prelude ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: cbz x8, .LBB8_2 ; CHECK-NEXT: // %bb.1: // %save.za ; CHECK-NEXT: bl __arm_tpidr2_save ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: smstart za ; CHECK-NEXT: zero { zt0 } ; CHECK-NEXT: blr x0 ; CHECK-NEXT: smstop za ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret call void %callee() "aarch64_in_zt0"; ret void; } ; Expect commit of lazy-save if ZA is dormant ; Expect smstart ZA, clear ZA & clear ZT0 ; Before return, expect smstop ZA define void @new_za_zt0_caller(ptr %callee) "aarch64_new_za" "aarch64_new_zt0" nounwind { ; CHECK-LABEL: new_za_zt0_caller: ; CHECK: // %bb.0: // %prelude ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: cbz x8, .LBB9_2 ; CHECK-NEXT: // %bb.1: // %save.za ; CHECK-NEXT: bl __arm_tpidr2_save ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: smstart za ; CHECK-NEXT: zero {za} ; CHECK-NEXT: zero { zt0 } ; CHECK-NEXT: blr x0 ; CHECK-NEXT: smstop za ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret call void %callee() "aarch64_inout_za" "aarch64_in_zt0"; ret void; } ; Expect clear ZA on entry define void @new_za_shared_zt0_caller(ptr %callee) "aarch64_new_za" "aarch64_in_zt0" nounwind { ; CHECK-LABEL: new_za_shared_zt0_caller: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: zero {za} ; CHECK-NEXT: blr x0 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret call void %callee() "aarch64_inout_za" "aarch64_in_zt0"; ret void; } ; Expect clear ZT0 on entry define void @shared_za_new_zt0(ptr %callee) "aarch64_inout_za" "aarch64_new_zt0" nounwind { ; CHECK-LABEL: shared_za_new_zt0: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: zero { zt0 } ; CHECK-NEXT: blr x0 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret call void %callee() "aarch64_inout_za" "aarch64_in_zt0"; ret void; }