diff options
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 24 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll | 161 |
3 files changed, 189 insertions, 15 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index bf0eb14..429affe 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1394,6 +1394,18 @@ bool requiresGetVGCall(MachineFunction &MF) { !MF.getSubtarget<AArch64Subtarget>().hasSVE(); } +static bool requiresSaveVG(MachineFunction &MF) { + AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + // For Darwin platforms we don't save VG for non-SVE functions, even if SME + // is enabled with streaming mode changes. + if (!AFI->hasStreamingModeChanges()) + return false; + auto &ST = MF.getSubtarget<AArch64Subtarget>(); + if (ST.isTargetDarwin()) + return ST.hasSVE(); + return true; +} + bool isVGInstruction(MachineBasicBlock::iterator MBBI) { unsigned Opc = MBBI->getOpcode(); if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI || @@ -1430,8 +1442,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( // functions, we need to do this for both the streaming and non-streaming // vector length. Move past these instructions if necessary. MachineFunction &MF = *MBB.getParent(); - AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - if (AFI->hasStreamingModeChanges()) + if (requiresSaveVG(MF)) while (isVGInstruction(MBBI)) ++MBBI; @@ -1938,7 +1949,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && !IsSVECalleeSave(MBBI)) { // Move past instructions generated to calculate VG - if (AFI->hasStreamingModeChanges()) + if (requiresSaveVG(MF)) while (isVGInstruction(MBBI)) ++MBBI; @@ -3754,7 +3765,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // non-streaming VG value. const Function &F = MF.getFunction(); SMEAttrs Attrs(F); - if (AFI->hasStreamingModeChanges()) { + if (requiresSaveVG(MF)) { if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface()) CSStackSize += 16; else @@ -3907,7 +3918,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( } // Insert VG into the list of CSRs, immediately before LR if saved. - if (AFI->hasStreamingModeChanges()) { + if (requiresSaveVG(MF)) { std::vector<CalleeSavedInfo> VGSaves; SMEAttrs Attrs(MF.getFunction()); @@ -4636,10 +4647,9 @@ MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II, void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS = nullptr) const { - AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); for (auto &BB : MF) for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) { - if (AFI->hasStreamingModeChanges()) + if (requiresSaveVG(MF)) II = emitVGSaveRestore(II, this); if (StackTaggingMergeSetTag) II = tryMergeAdjacentSTG(II, this, RS); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index efc1703..bb7aea2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8762,10 +8762,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue InGlue; if (RequiresSMChange) { - - Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL, - DAG.getVTList(MVT::Other, MVT::Glue), Chain); - InGlue = Chain.getValue(1); + if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { + Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL, + DAG.getVTList(MVT::Other, MVT::Glue), Chain); + InGlue = Chain.getValue(1); + } SDValue NewChain = changeStreamingMode( DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue, @@ -8944,11 +8945,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Result = changeStreamingMode( DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue, getSMCondition(CallerAttrs, CalleeAttrs), PStateSM); - InGlue = Result.getValue(1); - Result = - DAG.getNode(AArch64ISD::VG_RESTORE, DL, - DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue}); + if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { + InGlue = Result.getValue(1); + Result = + DAG.getNode(AArch64ISD::VG_RESTORE, DL, + DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue}); + } } if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs)) diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll new file mode 100644 index 0000000..36a300f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -o - %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx14.0.0" + +; Check we don't crash on Darwin and that we don't try to save VG +; when only SME (and not SVE) is enabled. + +; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync) +define noundef i32 @main() local_unnamed_addr #0 { +; CHECK-LABEL: main: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp d15, d14, [sp, #-80]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset b8, -24 +; CHECK-NEXT: .cfi_offset b9, -32 +; CHECK-NEXT: .cfi_offset b10, -40 +; CHECK-NEXT: .cfi_offset b11, -48 +; CHECK-NEXT: .cfi_offset b12, -56 +; CHECK-NEXT: .cfi_offset b13, -64 +; CHECK-NEXT: .cfi_offset b14, -72 +; CHECK-NEXT: .cfi_offset b15, -80 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: bl __ZL9sme_crashv +; CHECK-NEXT: smstop sm +; CHECK-NEXT: mov w0, #0 ; =0x0 +; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 ; 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret +entry: + tail call fastcc void @_ZL9sme_crashv() #4 + ret i32 0 +} + +; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync) +define internal fastcc void @_ZL9sme_crashv() unnamed_addr #1 { +; CHECK-LABEL: _ZL9sme_crashv: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #80 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w27, -24 +; CHECK-NEXT: .cfi_offset w28, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: sub x9, sp, #160 +; CHECK-NEXT: and sp, x9, #0xffffffffffffff00 +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF] +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str x8, [sp, #152] +; CHECK-NEXT: mov z0.b, #0 ; =0x0 +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ldr x8, [sp, #152] +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: adrp x9, ___stack_chk_guard@GOTPAGE +; CHECK-NEXT: Lloh4: +; CHECK-NEXT: ldr x9, [x9, ___stack_chk_guard@GOTPAGEOFF] +; CHECK-NEXT: Lloh5: +; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.ne LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %entry +; CHECK-NEXT: sub sp, x29, #80 +; CHECK-NEXT: .cfi_def_cfa wsp, 96 +; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore w27 +; CHECK-NEXT: .cfi_restore w28 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB1_2: ; %entry +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl ___stack_chk_fail +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh3, Lloh4, Lloh5 +; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh1, Lloh2 +entry: + %uu = alloca [16 x float], align 256 + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %uu) #5 + call void @llvm.memset.p0.i64(ptr noundef nonnull align 256 dereferenceable(64) %uu, i8 0, i64 64, i1 false) + call void asm sideeffect "ptrue p0.s\0Ast1w { z0.s }, p0, [$0]\0A", "r"(ptr nonnull %uu) #5 + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %uu) #5 + ret void +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 + +; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 + +attributes #0 = { mustprogress norecurse nounwind ssp uwtable(sync) "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" } +attributes #1 = { mustprogress norecurse nounwind ssp uwtable(sync) "aarch64_pstate_sm_enabled" "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #4 = { "aarch64_pstate_sm_enabled" "no-builtin-calloc" "no-builtin-stpcpy" } +attributes #5 = { nounwind } |