aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorBenjamin Maxwell <benjamin.maxwell@arm.com>2025-10-02 19:05:14 +0100
committerGitHub <noreply@github.com>2025-10-02 19:05:14 +0100
commit8f67cdd9b7f4ffa3cca552b00d58e72dba66b924 (patch)
treeb4b257e477c351c1ba26ef3b54b21abcb1ec7307 /llvm/lib
parentb86ddae1da651f921125a9864b45a5b11bc3b1c0 (diff)
downloadllvm-8f67cdd9b7f4ffa3cca552b00d58e72dba66b924.zip
llvm-8f67cdd9b7f4ffa3cca552b00d58e72dba66b924.tar.gz
llvm-8f67cdd9b7f4ffa3cca552b00d58e72dba66b924.tar.bz2
[AArch64][SME] Support split ZPR and PPR area allocation (#142392)
For a while we have supported the `-aarch64-stack-hazard-size=<size>` option, which adds "hazard padding" between GPRs and FPR/ZPRs. However, there is currently a hole in this mitigation as PPR and FPR/ZPR accesses to the same area also cause streaming memory hazards (this is noted by `-pass-remarks-analysis=sme -aarch64-stack-hazard-remark-size=<val>`), and the current stack layout places PPRs and ZPRs within the same area. Which looks like: ``` ------------------------------------ Higher address | callee-saved gpr registers | |---------------------------------- | | lr,fp (a.k.a. "frame record") | |-----------------------------------| <- fp(=x29) | <hazard padding> | |-----------------------------------| | callee-saved fp/simd/SVE regs | |-----------------------------------| | SVE stack objects | |-----------------------------------| | local variables of fixed size | | <FPR> | | <hazard padding> | | <GPR> | ------------------------------------| <- sp | Lower address ``` With this patch the stack (and hazard padding) is rearranged so that hazard padding is placed between the PPRs and ZPRs rather than within the (fixed size) callee-save region. Which looks something like this: ``` ------------------------------------ Higher address | callee-saved gpr registers | |---------------------------------- | | lr,fp (a.k.a. "frame record") | |-----------------------------------| <- fp(=x29) | callee-saved PPRs | | PPR stack objects | (These are SVE predicates) |-----------------------------------| | <hazard padding> | |-----------------------------------| | callee-saved ZPR regs | (These are SVE vectors) | ZPR stack objects | Note: FPRs are promoted to ZPRs |-----------------------------------| | local variables of fixed size | | <FPR> | | <hazard padding> | | <GPR> | ------------------------------------| <- sp | Lower address ``` This layout is only enabled if: * SplitSVEObjects are enabled (`-aarch64-split-sve-objects`) - (This may be enabled by default in a later patch) * Streaming memory hazards are present - (`-aarch64-stack-hazard-size=<val>` != 0) * PPRs and FPRs/ZPRs are on the stack * There's no stack realignment or variable-sized objects - This is left as a TODO for now Additionally, any FPR callee-saves that are present will be promoted to ZPRs. This is to prevent stack hazards between FPRs and GRPs in the fixed size callee-save area (which would otherwise require more hazard padding, or moving the FPR callee-saves). This layout should resolve the hole in the hazard padding mitigation, and is not intended change codegen for non-SME code.
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp297
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h8
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h20
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp125
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp11
9 files changed, 384 insertions, 90 deletions
diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
index ec75dc3..64e5cd5 100644
--- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
+++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
@@ -72,7 +72,7 @@ struct StackFrameLayoutAnalysis {
: Slot(Idx), Size(MFI.getObjectSize(Idx)),
Align(MFI.getObjectAlign(Idx).value()), Offset(Offset),
SlotTy(Invalid), Scalable(false) {
- Scalable = MFI.isScalableStackID(Idx);
+ Scalable = MFI.hasScalableStackID(Idx);
if (MFI.isSpillSlotObjectIndex(Idx))
SlotTy = SlotType::Spill;
else if (MFI.isFixedObjectIndex(Idx))
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 79655e1..0f4bbfc3 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1610,7 +1610,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
int BaseOffset = -AFI->getTaggedBasePointerOffset();
Register FrameReg;
StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
- MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
+ MF, BaseOffset, false /*isFixed*/, TargetStackID::Default /*StackID*/,
+ FrameReg,
/*PreferFP=*/false,
/*ForSimm=*/true);
Register SrcReg = FrameReg;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index f5f7b65..8d6eb91 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -56,15 +56,20 @@
// | async context if needed |
// | (a.k.a. "frame record") |
// |-----------------------------------| <- fp(=x29)
-// | <hazard padding> |
-// |-----------------------------------|
-// | |
-// | callee-saved fp/simd/SVE regs |
-// | |
-// |-----------------------------------|
-// | |
-// | SVE stack objects |
-// | |
+// Default SVE stack layout Split SVE objects
+// (aarch64-split-sve-objects=false) (aarch64-split-sve-objects=true)
+// |-----------------------------------| |-----------------------------------|
+// | <hazard padding> | | callee-saved PPR registers |
+// |-----------------------------------| |-----------------------------------|
+// | | | PPR stack objects |
+// | callee-saved fp/simd/SVE regs | |-----------------------------------|
+// | | | <hazard padding> |
+// |-----------------------------------| |-----------------------------------|
+// | | | callee-saved ZPR/FPR registers |
+// | SVE stack objects | |-----------------------------------|
+// | | | ZPR stack objects |
+// |-----------------------------------| |-----------------------------------|
+// ^ NB: FPR CSRs are promoted to ZPRs
// |-----------------------------------|
// |.empty.space.to.make.part.below....|
// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
@@ -274,6 +279,11 @@ static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
cl::desc("sort stack allocations"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ SplitSVEObjects("aarch64-split-sve-objects",
+ cl::desc("Split allocation of ZPR & PPR objects"),
+ cl::init(false), cl::Hidden);
+
cl::opt<bool> EnableHomogeneousPrologEpilog(
"homogeneous-prolog-epilog", cl::Hidden,
cl::desc("Emit homogeneous prologue and epilogue for the size "
@@ -329,8 +339,7 @@ enum class AssignObjectOffsets { No, Yes };
/// each object. If AssignOffsets is "Yes", the offsets get assigned (and SVE
/// stack sizes set). Returns the size of the SVE stack.
static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
- AssignObjectOffsets AssignOffsets,
- bool SplitSVEObjects = false);
+ AssignObjectOffsets AssignOffsets);
static unsigned getStackHazardSize(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
@@ -350,8 +359,13 @@ AArch64FrameLowering::getZPRStackSize(const MachineFunction &MF) const {
StackOffset
AArch64FrameLowering::getPPRStackSize(const MachineFunction &MF) const {
+ // With split SVE objects, the hazard padding is added to the PPR region,
+ // which places it between the [GPR, PPR] area and the [ZPR, FPR] area. This
+ // avoids hazards between both GPRs and FPRs and ZPRs and PPRs.
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- return StackOffset::getScalable(AFI->getStackSizePPR());
+ return StackOffset::get(AFI->hasSplitSVEObjects() ? getStackHazardSize(MF)
+ : 0,
+ AFI->getStackSizePPR());
}
// Conservatively, returns true if the function is likely to have SVE vectors
@@ -368,7 +382,7 @@ static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL,
const MachineFrameInfo &MFI = MF.getFrameInfo();
for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd(); FI++) {
- if (MFI.isScalableStackID(FI))
+ if (MFI.hasScalableStackID(FI))
return true;
}
@@ -1249,11 +1263,21 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
bool FPAfterSVECalleeSaves =
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
- if (MFI.isScalableStackID(FI)) {
+ if (MFI.hasScalableStackID(FI)) {
if (FPAfterSVECalleeSaves &&
- -ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize())
+ -ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) {
+ assert(!AFI->hasSplitSVEObjects() &&
+ "split-sve-objects not supported with FPAfterSVECalleeSaves");
return StackOffset::getScalable(ObjectOffset);
- return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
+ }
+ StackOffset AccessOffset{};
+ // The scalable vectors are below (lower address) the scalable predicates
+ // with split SVE objects, so we must subtract the size of the predicates.
+ if (AFI->hasSplitSVEObjects() &&
+ MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ AccessOffset = -PPRStackSize;
+ return AccessOffset +
+ StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
ObjectOffset);
}
@@ -1315,14 +1339,15 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
bool isFixed = MFI.isFixedObjectIndex(FI);
- bool isSVE = MFI.isScalableStackID(FI);
- return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
- PreferFP, ForSimm);
+ auto StackID = static_cast<TargetStackID::Value>(MFI.getStackID(FI));
+ return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, StackID,
+ FrameReg, PreferFP, ForSimm);
}
StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
- const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
- Register &FrameReg, bool PreferFP, bool ForSimm) const {
+ const MachineFunction &MF, int64_t ObjectOffset, bool isFixed,
+ TargetStackID::Value StackID, Register &FrameReg, bool PreferFP,
+ bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
@@ -1333,8 +1358,11 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
+ bool isSVE = MFI.isScalableStackID(StackID);
- const StackOffset SVEStackSize = getSVEStackSize(MF);
+ StackOffset ZPRStackSize = getZPRStackSize(MF);
+ StackOffset PPRStackSize = getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -1409,12 +1437,25 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
if (isSVE) {
- StackOffset FPOffset =
- StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
+ StackOffset FPOffset = StackOffset::get(
+ -AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
StackOffset SPOffset =
SVEStackSize +
StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
ObjectOffset);
+
+ // With split SVE objects the ObjectOffset is relative to the split area
+ // (i.e. the PPR area or ZPR area respectively).
+ if (AFI->hasSplitSVEObjects() && StackID == TargetStackID::ScalableVector) {
+ // If we're accessing an SVE vector with split SVE objects...
+ // - From the FP we need to move down past the PPR area:
+ FPOffset -= PPRStackSize;
+ // - From the SP we only need to move up to the ZPR area:
+ SPOffset -= PPRStackSize;
+ // Note: `SPOffset = SVEStackSize + ...`, so `-= PPRStackSize` results in
+ // `SPOffset = ZPRStackSize + ...`.
+ }
+
if (FPAfterSVECalleeSaves) {
FPOffset += StackOffset::getScalable(AFI->getSVECalleeSavedStackSize());
if (-ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) {
@@ -1422,6 +1463,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
SPOffset += StackOffset::getFixed(AFI->getCalleeSavedStackSize());
}
}
+
// Always use the FP for SVE spills if available and beneficial.
if (hasFP(MF) && (SPOffset.getFixed() ||
FPOffset.getScalable() < SPOffset.getScalable() ||
@@ -1429,13 +1471,13 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
}
-
FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
: (unsigned)AArch64::SP;
+
return SPOffset;
}
- StackOffset ScalableOffset = {};
+ StackOffset SVEAreaOffset = {};
if (FPAfterSVECalleeSaves) {
// In this stack layout, the FP is in between the callee saves and other
// SVE allocations.
@@ -1443,25 +1485,25 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
StackOffset::getScalable(AFI->getSVECalleeSavedStackSize());
if (UseFP) {
if (isFixed)
- ScalableOffset = SVECalleeSavedStack;
+ SVEAreaOffset = SVECalleeSavedStack;
else if (!isCSR)
- ScalableOffset = SVECalleeSavedStack - SVEStackSize;
+ SVEAreaOffset = SVECalleeSavedStack - SVEStackSize;
} else {
if (isFixed)
- ScalableOffset = SVEStackSize;
+ SVEAreaOffset = SVEStackSize;
else if (isCSR)
- ScalableOffset = SVEStackSize - SVECalleeSavedStack;
+ SVEAreaOffset = SVEStackSize - SVECalleeSavedStack;
}
} else {
if (UseFP && !(isFixed || isCSR))
- ScalableOffset = -SVEStackSize;
+ SVEAreaOffset = -SVEStackSize;
if (!UseFP && (isFixed || isCSR))
- ScalableOffset = SVEStackSize;
+ SVEAreaOffset = SVEStackSize;
}
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
- return StackOffset::getFixed(FPOffset) + ScalableOffset;
+ return StackOffset::getFixed(FPOffset) + SVEAreaOffset;
}
// Use the base pointer if we have one.
@@ -1478,7 +1520,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
Offset -= AFI->getLocalStackSize();
}
- return StackOffset::getFixed(Offset) + ScalableOffset;
+ return StackOffset::getFixed(Offset) + SVEAreaOffset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
@@ -1635,14 +1677,25 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
RegInc = -1;
FirstReg = Count - 1;
}
+
bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
- int ScalableByteOffset = FPAfterSVECalleeSaves
- ? 0
- : AFI->getZPRCalleeSavedStackSize() +
- AFI->getPPRCalleeSavedStackSize();
+
+ int ZPRByteOffset = 0;
+ int PPRByteOffset = 0;
+ bool SplitPPRs = AFI->hasSplitSVEObjects();
+ if (SplitPPRs) {
+ ZPRByteOffset = AFI->getZPRCalleeSavedStackSize();
+ PPRByteOffset = AFI->getPPRCalleeSavedStackSize();
+ } else if (!FPAfterSVECalleeSaves) {
+ ZPRByteOffset =
+ AFI->getZPRCalleeSavedStackSize() + AFI->getPPRCalleeSavedStackSize();
+ // Unused: Everything goes in ZPR space.
+ PPRByteOffset = 0;
+ }
+
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
Register LastReg = 0;
- bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex();
+ bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex() && !SplitPPRs;
// When iterating backwards, the loop condition relies on unsigned wraparound.
for (unsigned i = FirstReg; i < Count; i += RegInc) {
@@ -1671,6 +1724,10 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
llvm_unreachable("Unsupported register class.");
}
+ int &ScalableByteOffset = RPI.Type == RegPairInfo::PPR && SplitPPRs
+ ? PPRByteOffset
+ : ZPRByteOffset;
+
// Add the stack hazard size as we transition from GPR->FPR CSRs.
if (HasCSHazardPadding &&
(!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
@@ -2227,6 +2284,13 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
return getMMOFrameID(*MI.memoperands_begin(), MFI);
}
+// Returns true if the LDST MachineInstr \p MI is a PPR access.
+static bool isPPRAccess(const MachineInstr &MI) {
+ return MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
+ MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO &&
+ AArch64::PPRRegClass.contains(MI.getOperand(0).getReg());
+}
+
// Check if a Hazard slot is needed for the current function, and if so create
// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
// which can be used to determine if any hazard padding is needed.
@@ -2250,25 +2314,50 @@ void AArch64FrameLowering::determineStackHazardSlot(
bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
return AArch64::FPR64RegClass.contains(Reg) ||
AArch64::FPR128RegClass.contains(Reg) ||
- AArch64::ZPRRegClass.contains(Reg) ||
- AArch64::PPRRegClass.contains(Reg);
+ AArch64::ZPRRegClass.contains(Reg);
+ });
+ bool HasPPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
+ return AArch64::PPRRegClass.contains(Reg);
});
bool HasFPRStackObjects = false;
- if (!HasFPRCSRs) {
- std::vector<unsigned> FrameObjects(MFI.getObjectIndexEnd());
+ bool HasPPRStackObjects = false;
+ if (!HasFPRCSRs || SplitSVEObjects) {
+ enum SlotType : uint8_t {
+ Unknown = 0,
+ ZPRorFPR = 1 << 0,
+ PPR = 1 << 1,
+ GPR = 1 << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(GPR)
+ };
+
+ // Find stack slots solely used for one kind of register (ZPR, PPR, etc.),
+ // based on the kinds of accesses used in the function.
+ SmallVector<SlotType> SlotTypes(MFI.getObjectIndexEnd(), SlotType::Unknown);
for (auto &MBB : MF) {
for (auto &MI : MBB) {
std::optional<int> FI = getLdStFrameID(MI, MFI);
- if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
- if (MFI.isScalableStackID(*FI) || AArch64InstrInfo::isFpOrNEON(MI))
- FrameObjects[*FI] |= 2;
- else
- FrameObjects[*FI] |= 1;
+ if (!FI || FI < 0 || FI > int(SlotTypes.size()))
+ continue;
+ if (MFI.hasScalableStackID(*FI)) {
+ SlotTypes[*FI] |=
+ isPPRAccess(MI) ? SlotType::PPR : SlotType::ZPRorFPR;
+ } else {
+ SlotTypes[*FI] |= AArch64InstrInfo::isFpOrNEON(MI)
+ ? SlotType::ZPRorFPR
+ : SlotType::GPR;
}
}
}
- HasFPRStackObjects =
- any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; });
+
+ for (int FI = 0; FI < int(SlotTypes.size()); ++FI) {
+ HasFPRStackObjects |= SlotTypes[FI] == SlotType::ZPRorFPR;
+ // For SplitSVEObjects remember that this stack slot is a predicate, this
+ // will be needed later when determining the frame layout.
+ if (SlotTypes[FI] == SlotType::PPR) {
+ MFI.setStackID(FI, TargetStackID::ScalablePredicateVector);
+ HasPPRStackObjects = true;
+ }
+ }
}
if (HasFPRCSRs || HasFPRStackObjects) {
@@ -2277,6 +2366,78 @@ void AArch64FrameLowering::determineStackHazardSlot(
<< StackHazardSize << "\n");
AFI->setStackHazardSlotIndex(ID);
}
+
+ // Determine if we should use SplitSVEObjects. This should only be used if
+ // there's a possibility of a stack hazard between PPRs and ZPRs or FPRs.
+ if (SplitSVEObjects) {
+ if (!HasPPRCSRs && !HasPPRStackObjects) {
+ LLVM_DEBUG(
+ dbgs() << "Not using SplitSVEObjects as no PPRs are on the stack\n");
+ return;
+ }
+
+ if (!HasFPRCSRs && !HasFPRStackObjects) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Not using SplitSVEObjects as no FPRs or ZPRs are on the stack\n");
+ return;
+ }
+
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (MFI.hasVarSizedObjects() || TRI->hasStackRealignment(MF)) {
+ LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with variable "
+ "sized objects or realignment\n");
+ return;
+ }
+
+ if (arePPRsSpilledAsZPR(MF)) {
+ LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with "
+ "-aarch64-enable-zpr-predicate-spills");
+ return;
+ }
+
+ // If another calling convention is explicitly set FPRs can't be promoted to
+ // ZPR callee-saves.
+ if (!is_contained({CallingConv::C, CallingConv::Fast,
+ CallingConv::AArch64_SVE_VectorCall},
+ MF.getFunction().getCallingConv())) {
+ LLVM_DEBUG(
+ dbgs() << "Calling convention is not supported with SplitSVEObjects");
+ return;
+ }
+
+ [[maybe_unused]] const AArch64Subtarget &Subtarget =
+ MF.getSubtarget<AArch64Subtarget>();
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Expected SVE to be available for PPRs");
+
+ // With SplitSVEObjects the CS hazard padding is placed between the
+ // PPRs and ZPRs. If there are any FPR CS there would be a hazard between
+ // them and the CS GRPs. Avoid this by promoting all FPR CS to ZPRs.
+ BitVector FPRZRegs(SavedRegs.size());
+ for (size_t Reg = 0, E = SavedRegs.size(); HasFPRCSRs && Reg < E; ++Reg) {
+ BitVector::reference RegBit = SavedRegs[Reg];
+ if (!RegBit)
+ continue;
+ unsigned SubRegIdx = 0;
+ if (AArch64::FPR64RegClass.contains(Reg))
+ SubRegIdx = AArch64::dsub;
+ else if (AArch64::FPR128RegClass.contains(Reg))
+ SubRegIdx = AArch64::zsub;
+ else
+ continue;
+ // Clear the bit for the FPR save.
+ RegBit = false;
+ // Mark that we should save the corresponding ZPR.
+ Register ZReg =
+ TRI->getMatchingSuperReg(Reg, SubRegIdx, &AArch64::ZPRRegClass);
+ FPRZRegs.set(ZReg);
+ }
+ SavedRegs |= FPRZRegs;
+
+ AFI->setSplitSVEObjects(true);
+ LLVM_DEBUG(dbgs() << "SplitSVEObjects enabled!\n");
+ }
}
void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
@@ -2410,6 +2571,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(AArch64::X18);
}
+ // Determine if a Hazard slot should be used and where it should go.
+ // If SplitSVEObjects is used, the hazard padding is placed between the PPRs
+ // and ZPRs. Otherwise, it goes in the callee save area.
+ determineStackHazardSlot(MF, SavedRegs);
+
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
unsigned ZPRCSStackSize = 0;
@@ -2434,10 +2600,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// only 64-bit GPRs can be added to SavedRegs.
unsigned NumSavedRegs = SavedRegs.count();
- // Determine if a Hazard slot should be used, and increase the CSStackSize by
- // StackHazardSize if so.
- determineStackHazardSlot(MF, SavedRegs);
- if (AFI->hasStackHazardSlotIndex())
+ // If we have hazard padding in the CS area add that to the size.
+ if (AFI->isStackHazardIncludedInCalleeSaveArea())
CSStackSize += getStackHazardSize(MF);
// Increase the callee-saved stack size if the function has streaming mode
@@ -2607,7 +2771,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
// Create a hazard slot as we switch between GPR and FPR CSRs.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (AFI->isStackHazardIncludedInCalleeSaveArea() &&
(!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
AArch64InstrInfo::isFpOrNEON(Reg)) {
assert(HazardSlotIndex == std::numeric_limits<int>::max() &&
@@ -2646,7 +2810,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
}
// Add hazard slot in the case where no FPR CSRs are present.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (AFI->isStackHazardIncludedInCalleeSaveArea() &&
HazardSlotIndex == std::numeric_limits<int>::max()) {
HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true);
LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
@@ -2701,8 +2865,7 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
}
static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
- AssignObjectOffsets AssignOffsets,
- bool SplitSVEObjects) {
+ AssignObjectOffsets AssignOffsets) {
MachineFrameInfo &MFI = MF.getFrameInfo();
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -2713,12 +2876,12 @@ static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
// are included in the SVE vector area.
uint64_t &ZPRStackTop = SVEStack.ZPRStackSize;
uint64_t &PPRStackTop =
- SplitSVEObjects ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize;
+ AFI->hasSplitSVEObjects() ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize;
#ifndef NDEBUG
// First process all fixed stack objects.
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
- assert(!MFI.isScalableStackID(I) &&
+ assert(!MFI.hasScalableStackID(I) &&
"SVE vectors should never be passed on the stack by value, only by "
"reference.");
#endif
@@ -3393,7 +3556,8 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
Register Reg;
FrameRegOffset = TFI->resolveFrameOffsetReference(
- *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg,
+ *MF, FirstTagStore.Offset, false /*isFixed*/,
+ TargetStackID::Default /*StackID*/, Reg,
/*PreferFP=*/false, /*ForSimm=*/true);
FrameReg = Reg;
FrameRegUpdate = std::nullopt;
@@ -3733,10 +3897,12 @@ bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
void AArch64FrameLowering::orderFrameObjects(
const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
- if (!OrderFrameObjects || ObjectsToAllocate.empty())
+ const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+
+ if ((!OrderFrameObjects && !AFI.hasSplitSVEObjects()) ||
+ ObjectsToAllocate.empty())
return;
- const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
for (auto &Obj : ObjectsToAllocate) {
@@ -3755,7 +3921,8 @@ void AArch64FrameLowering::orderFrameObjects(
if (AFI.hasStackHazardSlotIndex()) {
std::optional<int> FI = getLdStFrameID(MI, MFI);
if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
- if (MFI.isScalableStackID(*FI) || AArch64InstrInfo::isFpOrNEON(MI))
+ if (MFI.getStackID(*FI) == TargetStackID::ScalableVector ||
+ AArch64InstrInfo::isFpOrNEON(MI))
FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;
else
FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;
@@ -4113,7 +4280,7 @@ void AArch64FrameLowering::emitRemarks(
}
unsigned RegTy = StackAccess::AccessType::GPR;
- if (MFI.isScalableStackID(FrameIdx)) {
+ if (MFI.hasScalableStackID(FrameIdx)) {
// SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO
// spill/fill the predicate as a data vector (so are an FPR access).
if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 38aa28b1..32a9bd8 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -69,8 +69,9 @@ public:
bool ForSimm) const;
StackOffset resolveFrameOffsetReference(const MachineFunction &MF,
int64_t ObjectOffset, bool isFixed,
- bool isSVE, Register &FrameReg,
- bool PreferFP, bool ForSimm) const;
+ TargetStackID::Value StackID,
+ Register &FrameReg, bool PreferFP,
+ bool ForSimm) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI,
@@ -155,7 +156,8 @@ public:
/// Returns the size of the entire ZPR stackframe (calleesaves + spills).
StackOffset getZPRStackSize(const MachineFunction &MF) const;
- /// Returns the size of the entire PPR stackframe (calleesaves + spills).
+ /// Returns the size of the entire PPR stackframe (calleesaves + spills +
+ /// hazard padding).
StackOffset getPPRStackSize(const MachineFunction &MF) const;
/// Returns the size of the entire SVE stackframe (PPRs + ZPRs).
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 35bbb0c0..e7b2d20 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7497,7 +7497,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
int FI = cast<FrameIndexSDNode>(N)->getIndex();
// We can only encode VL scaled offsets, so only fold in frame indexes
// referencing SVE objects.
- if (MFI.isScalableStackID(FI)) {
+ if (MFI.hasScalableStackID(FI)) {
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
return true;
@@ -7543,7 +7543,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
// We can only encode VL scaled offsets, so only fold in frame indexes
// referencing SVE objects.
- if (MFI.isScalableStackID(FI))
+ if (MFI.hasScalableStackID(FI))
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c2a482a..70d5ad7d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9256,7 +9256,7 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
(MI.getOpcode() == AArch64::ADDXri ||
MI.getOpcode() == AArch64::SUBXri)) {
const MachineOperand &MO = MI.getOperand(1);
- if (MO.isFI() && MF.getFrameInfo().isScalableStackID(MO.getIndex()))
+ if (MO.isFI() && MF.getFrameInfo().hasScalableStackID(MO.getIndex()))
MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false,
/*IsImplicit=*/true));
}
@@ -29608,7 +29608,7 @@ void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
// than doing it here in finalizeLowering.
if (MFI.hasStackProtectorIndex()) {
for (unsigned int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
- if (MFI.isScalableStackID(i) &&
+ if (MFI.hasScalableStackID(i) &&
MFI.getObjectSSPLayout(i) != MachineFrameInfo::SSPLK_None) {
MFI.setStackID(MFI.getStackProtectorIndex(),
TargetStackID::ScalableVector);
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 00c104e..91e64e6 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -137,6 +137,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
uint64_t StackSizeZPR = 0;
uint64_t StackSizePPR = 0;
+ /// Are SVE objects (vectors and predicates) split into separate regions on
+ /// the stack.
+ bool SplitSVEObjects = false;
+
/// HasCalculatedStackSizeSVE indicates whether StackSizeZPR/PPR is valid.
bool HasCalculatedStackSizeSVE = false;
@@ -336,7 +340,6 @@ public:
bool isStackRealigned() const { return StackRealigned; }
void setStackRealigned(bool s) { StackRealigned = s; }
-
bool hasCalleeSaveStackFreeSpace() const {
return CalleeSaveStackHasFreeSpace;
}
@@ -438,6 +441,8 @@ public:
}
unsigned getSVECalleeSavedStackSize() const {
+ assert(!hasSplitSVEObjects() &&
+ "ZPRs and PPRs are split. Use get[ZPR|PPR]CalleeSavedStackSize()");
return getZPRCalleeSavedStackSize() + getPPRCalleeSavedStackSize();
}
@@ -446,6 +451,10 @@ public:
return NumLocalDynamicTLSAccesses;
}
+ bool isStackHazardIncludedInCalleeSaveArea() const {
+ return hasStackHazardSlotIndex() && !hasSplitSVEObjects();
+ }
+
std::optional<bool> hasRedZone() const { return HasRedZone; }
void setHasRedZone(bool s) { HasRedZone = s; }
@@ -481,7 +490,14 @@ public:
StackHazardCSRSlotIndex = Index;
}
- bool hasSplitSVEObjects() const { return false; }
+ bool hasSplitSVEObjects() const { return SplitSVEObjects; }
+ void setSplitSVEObjects(bool s) { SplitSVEObjects = s; }
+
+ bool hasSVE_AAPCS(const MachineFunction &MF) const {
+ return hasSplitSVEObjects() || isSVECC() ||
+ MF.getFunction().getCallingConv() ==
+ CallingConv::AArch64_SVE_VectorCall;
+ }
SMEAttrs getSMEFnAttrs() const { return SMEFnAttrs; }
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index fec350b..aed137c 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -723,38 +723,73 @@ void AArch64PrologueEmitter::emitPrologue() {
StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
+ std::optional<MachineBasicBlock::iterator> ZPRCalleeSavesBegin,
+ ZPRCalleeSavesEnd, PPRCalleeSavesBegin, PPRCalleeSavesEnd;
+
StackOffset CFAOffset =
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
if (!FPAfterSVECalleeSaves) {
// Process the SVE callee-saves to find the starts/ends of the ZPR and PPR
// areas.
+ PPRCalleeSavesBegin = AfterGPRSavesI;
if (PPRCalleeSavesSize) {
LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
<< PPRCalleeSavesSize.getScalable() << "\n");
- assert(isPartOfPPRCalleeSaves(AfterSVESavesI) &&
+ assert(isPartOfPPRCalleeSaves(*PPRCalleeSavesBegin) &&
"Unexpected instruction");
while (isPartOfPPRCalleeSaves(AfterSVESavesI) &&
AfterSVESavesI != MBB.getFirstTerminator())
++AfterSVESavesI;
}
+ PPRCalleeSavesEnd = ZPRCalleeSavesBegin = AfterSVESavesI;
if (ZPRCalleeSavesSize) {
LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
<< ZPRCalleeSavesSize.getScalable() << "\n");
- assert(isPartOfZPRCalleeSaves(AfterSVESavesI) &&
+ assert(isPartOfZPRCalleeSaves(*ZPRCalleeSavesBegin) &&
"Unexpected instruction");
while (isPartOfZPRCalleeSaves(AfterSVESavesI) &&
AfterSVESavesI != MBB.getFirstTerminator())
++AfterSVESavesI;
}
+ ZPRCalleeSavesEnd = AfterSVESavesI;
}
if (EmitAsyncCFI)
emitCalleeSavedSVELocations(AfterSVESavesI);
if (AFI->hasSplitSVEObjects()) {
- reportFatalInternalError("not implemented yet");
+ assert(!FPAfterSVECalleeSaves &&
+ "Cannot use FPAfterSVECalleeSaves with aarch64-split-sve-objects");
+ assert(!AFL.canUseRedZone(MF) &&
+ "Cannot use redzone with aarch64-split-sve-objects");
+ // TODO: Handle HasWinCFI/NeedsWinCFI?
+ assert(!NeedsWinCFI &&
+ "WinCFI with aarch64-split-sve-objects is not supported");
+
+ // Split ZPR and PPR allocation.
+ // Allocate PPR callee saves
+ allocateStackSpace(*PPRCalleeSavesBegin, 0, PPRCalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || ZPRCalleeSavesSize ||
+ ZPRLocalsSize || PPRLocalsSize);
+ CFAOffset += PPRCalleeSavesSize;
+
+ // Allocate PPR locals + ZPR callee saves
+ assert(PPRCalleeSavesEnd == ZPRCalleeSavesBegin &&
+ "Expected ZPR callee saves after PPR locals");
+ allocateStackSpace(*PPRCalleeSavesEnd, RealignmentPadding,
+ PPRLocalsSize + ZPRCalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || ZPRLocalsSize);
+ CFAOffset += PPRLocalsSize + ZPRCalleeSavesSize;
+
+ // Allocate ZPR locals
+ allocateStackSpace(*ZPRCalleeSavesEnd, RealignmentPadding,
+ ZPRLocalsSize + StackOffset::getFixed(NumBytes),
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects());
} else {
// Allocate space for the callee saves (if any).
StackOffset LocalsSize =
@@ -1195,7 +1230,7 @@ void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
for (const auto &Info : CSI) {
unsigned FrameIdx = Info.getFrameIdx();
- if (MFI.isScalableStackID(FrameIdx))
+ if (MFI.hasScalableStackID(FrameIdx))
continue;
assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
@@ -1221,8 +1256,10 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
AFL.getOffsetOfLocalArea();
}
+ StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
for (const auto &Info : CSI) {
- if (!MFI.isScalableStackID(Info.getFrameIdx()))
+ int FI = Info.getFrameIdx();
+ if (!MFI.hasScalableStackID(FI))
continue;
// Not all unwinders may know about SVE registers, so assume the lowest
@@ -1233,9 +1270,13 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
continue;
StackOffset Offset =
- StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
+ StackOffset::getScalable(MFI.getObjectOffset(FI)) -
StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
+ if (AFI->hasSplitSVEObjects() &&
+ MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ Offset -= PPRStackSize;
+
CFIBuilder.insertCFIInst(
createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
}
@@ -1512,7 +1553,75 @@ void AArch64EpilogueEmitter::emitEpilogue() {
emitCalleeSavedSVERestores(RestoreEnd);
}
} else if (AFI->hasSplitSVEObjects() && SVEStackSize) {
- reportFatalInternalError("not implemented yet");
+ // TODO: Support stack realigment and variable-sized objects.
+ assert(!AFI->isStackRealigned() && !MFI.hasVarSizedObjects() &&
+ "unexpected stack realignment or variable sized objects with split "
+ "SVE stack objects");
+ // SplitSVEObjects. Determine the sizes and starts/ends of the ZPR and PPR
+ // areas.
+ auto ZPRCalleeSavedSize =
+ StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
+ auto PPRCalleeSavedSize =
+ StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
+ StackOffset PPRLocalsSize = PPRStackSize - PPRCalleeSavedSize;
+ StackOffset ZPRLocalsSize = ZPRStackSize - ZPRCalleeSavedSize;
+
+ MachineBasicBlock::iterator PPRRestoreBegin = FirstGPRRestoreI,
+ PPRRestoreEnd = FirstGPRRestoreI;
+ if (PPRCalleeSavedSize) {
+ PPRRestoreBegin = std::prev(PPRRestoreEnd);
+ while (PPRRestoreBegin != MBB.begin() &&
+ isPartOfPPRCalleeSaves(std::prev(PPRRestoreBegin)))
+ --PPRRestoreBegin;
+ }
+
+ MachineBasicBlock::iterator ZPRRestoreBegin = PPRRestoreBegin,
+ ZPRRestoreEnd = PPRRestoreBegin;
+ if (ZPRCalleeSavedSize) {
+ ZPRRestoreBegin = std::prev(ZPRRestoreEnd);
+ while (ZPRRestoreBegin != MBB.begin() &&
+ isPartOfZPRCalleeSaves(std::prev(ZPRRestoreBegin)))
+ --ZPRRestoreBegin;
+ }
+
+ auto CFAOffset =
+ SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
+ if (PPRCalleeSavedSize || ZPRCalleeSavedSize) {
+ // Deallocate the non-SVE locals first before we can deallocate (and
+ // restore callee saves) from the SVE area.
+ auto NonSVELocals = StackOffset::getFixed(NumBytes);
+ emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
+ NonSVELocals, TII, MachineInstr::FrameDestroy, false,
+ false, nullptr, EmitCFI && !HasFP, CFAOffset);
+ NumBytes = 0;
+ CFAOffset -= NonSVELocals;
+ }
+
+ if (ZPRLocalsSize) {
+ emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
+ ZPRLocalsSize, TII, MachineInstr::FrameDestroy, false,
+ false, nullptr, EmitCFI && !HasFP, CFAOffset);
+ CFAOffset -= ZPRLocalsSize;
+ }
+
+ if (PPRLocalsSize || ZPRCalleeSavedSize) {
+ assert(PPRRestoreBegin == ZPRRestoreEnd &&
+ "Expected PPR restores after ZPR");
+ emitFrameOffset(MBB, PPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
+ PPRLocalsSize + ZPRCalleeSavedSize, TII,
+ MachineInstr::FrameDestroy, false, false, nullptr,
+ EmitCFI && !HasFP, CFAOffset);
+ CFAOffset -= PPRLocalsSize + ZPRCalleeSavedSize;
+ }
+ if (PPRCalleeSavedSize) {
+ emitFrameOffset(MBB, PPRRestoreEnd, DL, AArch64::SP, AArch64::SP,
+ PPRCalleeSavedSize, TII, MachineInstr::FrameDestroy,
+ false, false, nullptr, EmitCFI && !HasFP, CFAOffset);
+ }
+
+ // We only emit CFI information for ZPRs so emit CFI after the ZPR restores.
+ if (EmitCFI)
+ emitCalleeSavedSVERestores(ZPRRestoreEnd);
}
if (!HasFP) {
@@ -1670,7 +1779,7 @@ void AArch64EpilogueEmitter::emitCalleeSavedRestores(
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
for (const auto &Info : CSI) {
- if (SVE != MFI.isScalableStackID(Info.getFrameIdx()))
+ if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
continue;
MCRegister Reg = Info.getReg();
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 3f43b70..79975b0 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -71,6 +71,7 @@ bool AArch64RegisterInfo::regNeedsCFI(MCRegister Reg,
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
+ auto &AFI = *MF->getInfo<AArch64FunctionInfo>();
if (MF->getFunction().getCallingConv() == CallingConv::GHC)
// GHC set of callee saved regs is empty as all those regs are
@@ -101,10 +102,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_Win_AArch64_AAPCS_SwiftTail_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
return CSR_Win_AArch64_AAVPCS_SaveList;
- if (MF->getFunction().getCallingConv() ==
- CallingConv::AArch64_SVE_VectorCall)
- return CSR_Win_AArch64_SVE_AAPCS_SaveList;
- if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
+ if (AFI.hasSVE_AAPCS(*MF))
return CSR_Win_AArch64_SVE_AAPCS_SaveList;
return CSR_Win_AArch64_AAPCS_SaveList;
}
@@ -148,7 +146,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
// This is for OSes other than Windows; Windows is a separate case further
// above.
return CSR_AArch64_AAPCS_X18_SaveList;
- if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
+ if (AFI.hasSVE_AAPCS(*MF))
return CSR_AArch64_SVE_AAPCS_SaveList;
return CSR_AArch64_AAPCS_SaveList;
}
@@ -158,6 +156,7 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
assert(MF->getSubtarget<AArch64Subtarget>().isTargetDarwin() &&
"Invalid subtarget for getDarwinCalleeSavedRegs");
+ auto &AFI = *MF->getInfo<AArch64FunctionInfo>();
if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
report_fatal_error(
@@ -205,7 +204,7 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_Darwin_AArch64_RT_AllRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::Win64)
return CSR_Darwin_AArch64_AAPCS_Win64_SaveList;
- if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
+ if (AFI.hasSVE_AAPCS(*MF))
return CSR_Darwin_AArch64_SVE_AAPCS_SaveList;
return CSR_Darwin_AArch64_AAPCS_SaveList;
}