aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp24
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp19
-rw-r--r--llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll161
3 files changed, 189 insertions, 15 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index bf0eb14..429affe 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1394,6 +1394,18 @@ bool requiresGetVGCall(MachineFunction &MF) {
!MF.getSubtarget<AArch64Subtarget>().hasSVE();
}
+static bool requiresSaveVG(MachineFunction &MF) {
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ // For Darwin platforms we don't save VG for non-SVE functions, even if SME
+ // is enabled with streaming mode changes.
+ if (!AFI->hasStreamingModeChanges())
+ return false;
+ auto &ST = MF.getSubtarget<AArch64Subtarget>();
+ if (ST.isTargetDarwin())
+ return ST.hasSVE();
+ return true;
+}
+
bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
unsigned Opc = MBBI->getOpcode();
if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
@@ -1430,8 +1442,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
// functions, we need to do this for both the streaming and non-streaming
// vector length. Move past these instructions if necessary.
MachineFunction &MF = *MBB.getParent();
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- if (AFI->hasStreamingModeChanges())
+ if (requiresSaveVG(MF))
while (isVGInstruction(MBBI))
++MBBI;
@@ -1938,7 +1949,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
!IsSVECalleeSave(MBBI)) {
// Move past instructions generated to calculate VG
- if (AFI->hasStreamingModeChanges())
+ if (requiresSaveVG(MF))
while (isVGInstruction(MBBI))
++MBBI;
@@ -3754,7 +3765,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// non-streaming VG value.
const Function &F = MF.getFunction();
SMEAttrs Attrs(F);
- if (AFI->hasStreamingModeChanges()) {
+ if (requiresSaveVG(MF)) {
if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
CSStackSize += 16;
else
@@ -3907,7 +3918,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
}
// Insert VG into the list of CSRs, immediately before LR if saved.
- if (AFI->hasStreamingModeChanges()) {
+ if (requiresSaveVG(MF)) {
std::vector<CalleeSavedInfo> VGSaves;
SMEAttrs Attrs(MF.getFunction());
@@ -4636,10 +4647,9 @@ MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II,
void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
MachineFunction &MF, RegScavenger *RS = nullptr) const {
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
for (auto &BB : MF)
for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
- if (AFI->hasStreamingModeChanges())
+ if (requiresSaveVG(MF))
II = emitVGSaveRestore(II, this);
if (StackTaggingMergeSetTag)
II = tryMergeAdjacentSTG(II, this, RS);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index efc1703..bb7aea2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8762,10 +8762,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue InGlue;
if (RequiresSMChange) {
-
- Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
- DAG.getVTList(MVT::Other, MVT::Glue), Chain);
- InGlue = Chain.getValue(1);
+ if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
+ Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
+ DAG.getVTList(MVT::Other, MVT::Glue), Chain);
+ InGlue = Chain.getValue(1);
+ }
SDValue NewChain = changeStreamingMode(
DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue,
@@ -8944,11 +8945,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Result = changeStreamingMode(
DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue,
getSMCondition(CallerAttrs, CalleeAttrs), PStateSM);
- InGlue = Result.getValue(1);
- Result =
- DAG.getNode(AArch64ISD::VG_RESTORE, DL,
- DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
+ if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
+ InGlue = Result.getValue(1);
+ Result =
+ DAG.getNode(AArch64ISD::VG_RESTORE, DL,
+ DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
+ }
}
if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs))
diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
new file mode 100644
index 0000000..36a300f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -o - %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx14.0.0"
+
+; Check we don't crash on Darwin and that we don't try to save VG
+; when only SME (and not SVE) is enabled.
+
+; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
+define noundef i32 @main() local_unnamed_addr #0 {
+; CHECK-LABEL: main:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-80]! ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset b8, -24
+; CHECK-NEXT: .cfi_offset b9, -32
+; CHECK-NEXT: .cfi_offset b10, -40
+; CHECK-NEXT: .cfi_offset b11, -48
+; CHECK-NEXT: .cfi_offset b12, -56
+; CHECK-NEXT: .cfi_offset b13, -64
+; CHECK-NEXT: .cfi_offset b14, -72
+; CHECK-NEXT: .cfi_offset b15, -80
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: bl __ZL9sme_crashv
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: mov w0, #0 ; =0x0
+; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #80 ; 16-byte Folded Reload
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .cfi_restore w29
+; CHECK-NEXT: .cfi_restore b8
+; CHECK-NEXT: .cfi_restore b9
+; CHECK-NEXT: .cfi_restore b10
+; CHECK-NEXT: .cfi_restore b11
+; CHECK-NEXT: .cfi_restore b12
+; CHECK-NEXT: .cfi_restore b13
+; CHECK-NEXT: .cfi_restore b14
+; CHECK-NEXT: .cfi_restore b15
+; CHECK-NEXT: ret
+entry:
+ tail call fastcc void @_ZL9sme_crashv() #4
+ ret i32 0
+}
+
+; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
+define internal fastcc void @_ZL9sme_crashv() unnamed_addr #1 {
+; CHECK-LABEL: _ZL9sme_crashv:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x28, x27, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #80
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w27, -24
+; CHECK-NEXT: .cfi_offset w28, -32
+; CHECK-NEXT: .cfi_offset b8, -40
+; CHECK-NEXT: .cfi_offset b9, -48
+; CHECK-NEXT: .cfi_offset b10, -56
+; CHECK-NEXT: .cfi_offset b11, -64
+; CHECK-NEXT: .cfi_offset b12, -72
+; CHECK-NEXT: .cfi_offset b13, -80
+; CHECK-NEXT: .cfi_offset b14, -88
+; CHECK-NEXT: .cfi_offset b15, -96
+; CHECK-NEXT: .cfi_remember_state
+; CHECK-NEXT: sub x9, sp, #160
+; CHECK-NEXT: and sp, x9, #0xffffffffffffff00
+; CHECK-NEXT: Lloh0:
+; CHECK-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE
+; CHECK-NEXT: Lloh1:
+; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF]
+; CHECK-NEXT: Lloh2:
+; CHECK-NEXT: ldr x8, [x8]
+; CHECK-NEXT: str x8, [sp, #152]
+; CHECK-NEXT: mov z0.b, #0 ; =0x0
+; CHECK-NEXT: stp q0, q0, [sp, #32]
+; CHECK-NEXT: stp q0, q0, [sp]
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x8, [sp, #152]
+; CHECK-NEXT: Lloh3:
+; CHECK-NEXT: adrp x9, ___stack_chk_guard@GOTPAGE
+; CHECK-NEXT: Lloh4:
+; CHECK-NEXT: ldr x9, [x9, ___stack_chk_guard@GOTPAGEOFF]
+; CHECK-NEXT: Lloh5:
+; CHECK-NEXT: ldr x9, [x9]
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: b.ne LBB1_2
+; CHECK-NEXT: ; %bb.1: ; %entry
+; CHECK-NEXT: sub sp, x29, #80
+; CHECK-NEXT: .cfi_def_cfa wsp, 96
+; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .cfi_restore w29
+; CHECK-NEXT: .cfi_restore w27
+; CHECK-NEXT: .cfi_restore w28
+; CHECK-NEXT: .cfi_restore b8
+; CHECK-NEXT: .cfi_restore b9
+; CHECK-NEXT: .cfi_restore b10
+; CHECK-NEXT: .cfi_restore b11
+; CHECK-NEXT: .cfi_restore b12
+; CHECK-NEXT: .cfi_restore b13
+; CHECK-NEXT: .cfi_restore b14
+; CHECK-NEXT: .cfi_restore b15
+; CHECK-NEXT: ret
+; CHECK-NEXT: LBB1_2: ; %entry
+; CHECK-NEXT: .cfi_restore_state
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: bl ___stack_chk_fail
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh3, Lloh4, Lloh5
+; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh1, Lloh2
+entry:
+ %uu = alloca [16 x float], align 256
+ call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %uu) #5
+ call void @llvm.memset.p0.i64(ptr noundef nonnull align 256 dereferenceable(64) %uu, i8 0, i64 64, i1 false)
+ call void asm sideeffect "ptrue p0.s\0Ast1w { z0.s }, p0, [$0]\0A", "r"(ptr nonnull %uu) #5
+ call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %uu) #5
+ ret void
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
+
+attributes #0 = { mustprogress norecurse nounwind ssp uwtable(sync) "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
+attributes #1 = { mustprogress norecurse nounwind ssp uwtable(sync) "aarch64_pstate_sm_enabled" "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
+attributes #4 = { "aarch64_pstate_sm_enabled" "no-builtin-calloc" "no-builtin-stpcpy" }
+attributes #5 = { nounwind }