aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp243
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h6
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h8
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp386
5 files changed, 450 insertions, 196 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 79655e1..a16d48e 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1612,7 +1612,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
/*PreferFP=*/false,
- /*ForSimm=*/true);
+ /*ForSimm=*/true,
+ /*FI=*/-1);
Register SrcReg = FrameReg;
if (FrameRegOffset) {
// Use output register as temporary.
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index f5f7b65..d88e4c4 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -274,6 +274,11 @@ static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
cl::desc("sort stack allocations"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ SplitSVEObjects("aarch64-split-sve-objects",
+ cl::desc("Split allocation of ZPR & PPR objects"),
+ cl::init(false), cl::Hidden);
+
cl::opt<bool> EnableHomogeneousPrologEpilog(
"homogeneous-prolog-epilog", cl::Hidden,
cl::desc("Emit homogeneous prologue and epilogue for the size "
@@ -329,8 +334,7 @@ enum class AssignObjectOffsets { No, Yes };
/// each object. If AssignOffsets is "Yes", the offsets get assigned (and SVE
/// stack sizes set). Returns the size of the SVE stack.
static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
- AssignObjectOffsets AssignOffsets,
- bool SplitSVEObjects = false);
+ AssignObjectOffsets AssignOffsets);
static unsigned getStackHazardSize(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
@@ -351,7 +355,9 @@ AArch64FrameLowering::getZPRStackSize(const MachineFunction &MF) const {
StackOffset
AArch64FrameLowering::getPPRStackSize(const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- return StackOffset::getScalable(AFI->getStackSizePPR());
+ return StackOffset::get(AFI->hasSplitSVEObjects() ? getStackHazardSize(MF)
+ : 0,
+ AFI->getStackSizePPR());
}
// Conservatively, returns true if the function is likely to have SVE vectors
@@ -516,6 +522,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ if (AFI->hasSplitSVEObjects())
+ return false;
+
// Don't use the red zone if the function explicitly asks us not to.
// This is typically used for kernel code.
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -525,7 +535,6 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
return false;
const MachineFrameInfo &MFI = MF.getFrameInfo();
- const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
uint64_t NumBytes = AFI->getLocalStackSize();
// If neither NEON or SVE are available, a COPY from one Q-reg to
@@ -1251,9 +1260,17 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
if (MFI.isScalableStackID(FI)) {
if (FPAfterSVECalleeSaves &&
- -ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize())
+ -ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) {
+ assert(!AFI->hasSplitSVEObjects() &&
+ "split-sve-objects not supported with FPAfterSVECalleeSaves");
return StackOffset::getScalable(ObjectOffset);
- return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
+ }
+ StackOffset AccessOffset{};
+ if (AFI->hasSplitSVEObjects() &&
+ MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ AccessOffset = -PPRStackSize;
+ return AccessOffset +
+ StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
ObjectOffset);
}
@@ -1317,12 +1334,12 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
bool isFixed = MFI.isFixedObjectIndex(FI);
bool isSVE = MFI.isScalableStackID(FI);
return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
- PreferFP, ForSimm);
+ PreferFP, ForSimm, FI);
}
StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
- Register &FrameReg, bool PreferFP, bool ForSimm) const {
+ Register &FrameReg, bool PreferFP, bool ForSimm, int64_t FI) const {
const auto &MFI = MF.getFrameInfo();
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
@@ -1334,7 +1351,9 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
- const StackOffset SVEStackSize = getSVEStackSize(MF);
+ StackOffset ZPRStackSize = getZPRStackSize(MF);
+ StackOffset PPRStackSize = getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -1409,10 +1428,17 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
if (isSVE) {
+ StackOffset AccessOffset{};
+ if (AFI->hasSplitSVEObjects() &&
+ MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ AccessOffset = -PPRStackSize;
+
StackOffset FPOffset =
- StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
+ AccessOffset +
+ StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(),
+ ObjectOffset);
StackOffset SPOffset =
- SVEStackSize +
+ SVEStackSize + AccessOffset +
StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
ObjectOffset);
if (FPAfterSVECalleeSaves) {
@@ -1422,6 +1448,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
SPOffset += StackOffset::getFixed(AFI->getCalleeSavedStackSize());
}
}
+
// Always use the FP for SVE spills if available and beneficial.
if (hasFP(MF) && (SPOffset.getFixed() ||
FPOffset.getScalable() < SPOffset.getScalable() ||
@@ -1429,13 +1456,13 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
}
-
FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
: (unsigned)AArch64::SP;
+
return SPOffset;
}
- StackOffset ScalableOffset = {};
+ StackOffset SVEAreaOffset = {};
if (FPAfterSVECalleeSaves) {
// In this stack layout, the FP is in between the callee saves and other
// SVE allocations.
@@ -1443,25 +1470,25 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
StackOffset::getScalable(AFI->getSVECalleeSavedStackSize());
if (UseFP) {
if (isFixed)
- ScalableOffset = SVECalleeSavedStack;
+ SVEAreaOffset = SVECalleeSavedStack;
else if (!isCSR)
- ScalableOffset = SVECalleeSavedStack - SVEStackSize;
+ SVEAreaOffset = SVECalleeSavedStack - SVEStackSize;
} else {
if (isFixed)
- ScalableOffset = SVEStackSize;
+ SVEAreaOffset = SVEStackSize;
else if (isCSR)
- ScalableOffset = SVEStackSize - SVECalleeSavedStack;
+ SVEAreaOffset = SVEStackSize - SVECalleeSavedStack;
}
} else {
if (UseFP && !(isFixed || isCSR))
- ScalableOffset = -SVEStackSize;
+ SVEAreaOffset = -SVEStackSize;
if (!UseFP && (isFixed || isCSR))
- ScalableOffset = SVEStackSize;
+ SVEAreaOffset = SVEStackSize;
}
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
- return StackOffset::getFixed(FPOffset) + ScalableOffset;
+ return StackOffset::getFixed(FPOffset) + SVEAreaOffset;
}
// Use the base pointer if we have one.
@@ -1478,7 +1505,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
Offset -= AFI->getLocalStackSize();
}
- return StackOffset::getFixed(Offset) + ScalableOffset;
+ return StackOffset::getFixed(Offset) + SVEAreaOffset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
@@ -1635,14 +1662,25 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
RegInc = -1;
FirstReg = Count - 1;
}
+
bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
- int ScalableByteOffset = FPAfterSVECalleeSaves
- ? 0
- : AFI->getZPRCalleeSavedStackSize() +
- AFI->getPPRCalleeSavedStackSize();
+
+ int ZPRByteOffset = 0;
+ int PPRByteOffset = 0;
+ bool SplitPPRs = AFI->hasSplitSVEObjects();
+ if (SplitPPRs) {
+ ZPRByteOffset = AFI->getZPRCalleeSavedStackSize();
+ PPRByteOffset = AFI->getPPRCalleeSavedStackSize();
+ } else if (!FPAfterSVECalleeSaves) {
+ ZPRByteOffset =
+ AFI->getZPRCalleeSavedStackSize() + AFI->getPPRCalleeSavedStackSize();
+ // Unused: Everything goes in ZPR space.
+ PPRByteOffset = 0;
+ }
+
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
Register LastReg = 0;
- bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex();
+ bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex() && !SplitPPRs;
// When iterating backwards, the loop condition relies on unsigned wraparound.
for (unsigned i = FirstReg; i < Count; i += RegInc) {
@@ -1671,6 +1709,10 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
llvm_unreachable("Unsupported register class.");
}
+ int &ScalableByteOffset = RPI.Type == RegPairInfo::PPR && SplitPPRs
+ ? PPRByteOffset
+ : ZPRByteOffset;
+
// Add the stack hazard size as we transition from GPR->FPR CSRs.
if (HasCSHazardPadding &&
(!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
@@ -2227,6 +2269,13 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
return getMMOFrameID(*MI.memoperands_begin(), MFI);
}
+// Returns true if the LDST MachineInstr \p MI is a PPR access.
+static bool isPPRAccess(const MachineInstr &MI) {
+ return MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
+ MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO &&
+ AArch64::PPRRegClass.contains(MI.getOperand(0).getReg());
+}
+
// Check if a Hazard slot is needed for the current function, and if so create
// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
// which can be used to determine if any hazard padding is needed.
@@ -2250,25 +2299,50 @@ void AArch64FrameLowering::determineStackHazardSlot(
bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
return AArch64::FPR64RegClass.contains(Reg) ||
AArch64::FPR128RegClass.contains(Reg) ||
- AArch64::ZPRRegClass.contains(Reg) ||
- AArch64::PPRRegClass.contains(Reg);
+ AArch64::ZPRRegClass.contains(Reg);
+ });
+ bool HasPPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
+ return AArch64::PPRRegClass.contains(Reg);
});
bool HasFPRStackObjects = false;
- if (!HasFPRCSRs) {
- std::vector<unsigned> FrameObjects(MFI.getObjectIndexEnd());
+ bool HasPPRStackObjects = false;
+ if (!HasFPRCSRs || SplitSVEObjects) {
+ enum SlotType : uint8_t {
+ Unknown = 0,
+ ZPRorFPR = 1 << 0,
+ PPR = 1 << 1,
+ GPR = 1 << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(GPR)
+ };
+
+ // Find stack slots solely used for one kind of register (ZPR, PPR, etc.),
+ // based on the kinds of accesses used in the function.
+ SmallVector<SlotType> SlotTypes(MFI.getObjectIndexEnd(), SlotType::Unknown);
for (auto &MBB : MF) {
for (auto &MI : MBB) {
std::optional<int> FI = getLdStFrameID(MI, MFI);
- if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
- if (MFI.isScalableStackID(*FI) || AArch64InstrInfo::isFpOrNEON(MI))
- FrameObjects[*FI] |= 2;
- else
- FrameObjects[*FI] |= 1;
+ if (!FI || FI < 0 || FI > int(SlotTypes.size()))
+ continue;
+ if (MFI.isScalableStackID(*FI)) {
+ SlotTypes[*FI] |=
+ isPPRAccess(MI) ? SlotType::PPR : SlotType::ZPRorFPR;
+ } else {
+ SlotTypes[*FI] |= AArch64InstrInfo::isFpOrNEON(MI)
+ ? SlotType::ZPRorFPR
+ : SlotType::GPR;
}
}
}
- HasFPRStackObjects =
- any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; });
+
+ for (int FI = 0; FI < int(SlotTypes.size()); ++FI) {
+ HasFPRStackObjects |= SlotTypes[FI] == SlotType::ZPRorFPR;
+ // For SplitSVEObjects remember that this stack slot is a predicate, this
+ // will be needed later when determining the frame layout.
+ if (SlotTypes[FI] == SlotType::PPR) {
+ MFI.setStackID(FI, TargetStackID::ScalablePredicateVector);
+ HasPPRStackObjects = true;
+ }
+ }
}
if (HasFPRCSRs || HasFPRStackObjects) {
@@ -2277,6 +2351,70 @@ void AArch64FrameLowering::determineStackHazardSlot(
<< StackHazardSize << "\n");
AFI->setStackHazardSlotIndex(ID);
}
+
+ // Determine if we should use SplitSVEObjects. This should only be used if
+ // there's a possibility of a stack hazard between PPRs and ZPRs or FPRs.
+ if (SplitSVEObjects) {
+ if (!HasPPRCSRs && !HasPPRStackObjects) {
+ LLVM_DEBUG(
+ dbgs() << "Not using SplitSVEObjects as no PPRs are on the stack\n");
+ return;
+ }
+
+ if (!HasFPRCSRs && !HasFPRStackObjects) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Not using SplitSVEObjects as no FPRs or ZPRs are on the stack\n");
+ return;
+ }
+
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (MFI.hasVarSizedObjects() || TRI->hasStackRealignment(MF)) {
+ LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with variable "
+ "sized objects or realignment\n");
+ return;
+ }
+
+ if (arePPRsSpilledAsZPR(MF)) {
+ LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with "
+ "-aarch64-enable-zpr-predicate-spills");
+ return;
+ }
+
+ [[maybe_unused]] const AArch64Subtarget &Subtarget =
+ MF.getSubtarget<AArch64Subtarget>();
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Expected SVE to be available for PPRs");
+
+ // With SplitSVEObjects the CS hazard padding is placed between the
+ // PPRs and ZPRs. If there are any FPR CS there would be a hazard between
+ // them and the CS GRPs. Avoid this by promoting all FPR CS to ZPRs.
+ BitVector FPRZRegs(SavedRegs.size());
+ for (size_t Reg = 0, E = SavedRegs.size(); HasFPRCSRs && Reg < E; ++Reg) {
+ BitVector::reference RegBit = SavedRegs[Reg];
+ if (!RegBit)
+ continue;
+ unsigned SubRegIdx = 0;
+ if (AArch64::FPR64RegClass.contains(Reg))
+ SubRegIdx = AArch64::dsub;
+ else if (AArch64::FPR128RegClass.contains(Reg))
+ SubRegIdx = AArch64::zsub; // TODO: Is the the right sub-register?
+ else
+ continue;
+ // Clear the bit for the FPR save.
+ RegBit = false;
+ // Mark that we should save the corresponding ZPR.
+ Register ZReg =
+ TRI->getMatchingSuperReg(Reg, SubRegIdx, &AArch64::ZPRRegClass);
+ FPRZRegs.set(ZReg);
+ }
+ SavedRegs |= FPRZRegs;
+
+ // FIXME: Avoid setting setting the CC. Since we've replaced FPRs with ZPRs
+ // we need to set this or later in PEI the ZPR CS will be masked out.
+ AFI->setIsSVECC(true);
+ AFI->setSplitSVEObjects(true);
+ }
}
void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
@@ -2410,6 +2548,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(AArch64::X18);
}
+ // Determine if a Hazard slot should be used and where it should go.
+ // If SplitSVEObjects is used, the hazard padding is placed between the PPRs
+ // and ZPRs. Otherwise, it goes in the callee save area.
+ determineStackHazardSlot(MF, SavedRegs);
+
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
unsigned ZPRCSStackSize = 0;
@@ -2434,10 +2577,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// only 64-bit GPRs can be added to SavedRegs.
unsigned NumSavedRegs = SavedRegs.count();
- // Determine if a Hazard slot should be used, and increase the CSStackSize by
- // StackHazardSize if so.
- determineStackHazardSlot(MF, SavedRegs);
- if (AFI->hasStackHazardSlotIndex())
+ // If we have hazard padding in the CS area add that to the size.
+ if (AFI->hasStackHazardSlotIndex() && !AFI->hasSplitSVEObjects())
CSStackSize += getStackHazardSize(MF);
// Increase the callee-saved stack size if the function has streaming mode
@@ -2607,7 +2748,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
// Create a hazard slot as we switch between GPR and FPR CSRs.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (AFI->hasStackHazardSlotIndex() && !AFI->hasSplitSVEObjects() &&
(!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
AArch64InstrInfo::isFpOrNEON(Reg)) {
assert(HazardSlotIndex == std::numeric_limits<int>::max() &&
@@ -2646,7 +2787,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
}
// Add hazard slot in the case where no FPR CSRs are present.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (AFI->hasStackHazardSlotIndex() && !AFI->hasSplitSVEObjects() &&
HazardSlotIndex == std::numeric_limits<int>::max()) {
HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true);
LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
@@ -2701,8 +2842,7 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
}
static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
- AssignObjectOffsets AssignOffsets,
- bool SplitSVEObjects) {
+ AssignObjectOffsets AssignOffsets) {
MachineFrameInfo &MFI = MF.getFrameInfo();
auto *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -2713,7 +2853,7 @@ static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
// are included in the SVE vector area.
uint64_t &ZPRStackTop = SVEStack.ZPRStackSize;
uint64_t &PPRStackTop =
- SplitSVEObjects ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize;
+ AFI->hasSplitSVEObjects() ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize;
#ifndef NDEBUG
// First process all fixed stack objects.
@@ -3394,7 +3534,7 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
Register Reg;
FrameRegOffset = TFI->resolveFrameOffsetReference(
*MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg,
- /*PreferFP=*/false, /*ForSimm=*/true);
+ /*PreferFP=*/false, /*ForSimm=*/true, /*FI=*/-1);
FrameReg = Reg;
FrameRegUpdate = std::nullopt;
@@ -3733,10 +3873,12 @@ bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
void AArch64FrameLowering::orderFrameObjects(
const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
- if (!OrderFrameObjects || ObjectsToAllocate.empty())
+ const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+
+ if ((!OrderFrameObjects && !AFI.hasSplitSVEObjects()) ||
+ ObjectsToAllocate.empty())
return;
- const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
for (auto &Obj : ObjectsToAllocate) {
@@ -3755,7 +3897,8 @@ void AArch64FrameLowering::orderFrameObjects(
if (AFI.hasStackHazardSlotIndex()) {
std::optional<int> FI = getLdStFrameID(MI, MFI);
if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
- if (MFI.isScalableStackID(*FI) || AArch64InstrInfo::isFpOrNEON(MI))
+ if (MFI.getStackID(*FI) == TargetStackID::ScalableVector ||
+ AArch64InstrInfo::isFpOrNEON(MI))
FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;
else
FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 38aa28b1..675773c 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -70,7 +70,8 @@ public:
StackOffset resolveFrameOffsetReference(const MachineFunction &MF,
int64_t ObjectOffset, bool isFixed,
bool isSVE, Register &FrameReg,
- bool PreferFP, bool ForSimm) const;
+ bool PreferFP, bool ForSimm,
+ int64_t FI) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI,
@@ -155,7 +156,8 @@ public:
/// Returns the size of the entire ZPR stackframe (calleesaves + spills).
StackOffset getZPRStackSize(const MachineFunction &MF) const;
- /// Returns the size of the entire PPR stackframe (calleesaves + spills).
+ /// Returns the size of the entire PPR stackframe (calleesaves + spills +
+ /// hazard padding).
StackOffset getPPRStackSize(const MachineFunction &MF) const;
/// Returns the size of the entire SVE stackframe (PPRs + ZPRs).
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 4a79d9c..096e180 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -137,6 +137,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
uint64_t StackSizeZPR = 0;
uint64_t StackSizePPR = 0;
+ /// Are SVE objects (vectors and predicates) split into separate regions on
+ /// the stack.
+ bool SplitSVEObjects = false;
+
/// HasCalculatedStackSizeSVE indicates whether StackSizeZPR/PPR is valid.
bool HasCalculatedStackSizeSVE = false;
@@ -336,7 +340,6 @@ public:
bool isStackRealigned() const { return StackRealigned; }
void setStackRealigned(bool s) { StackRealigned = s; }
-
bool hasCalleeSaveStackFreeSpace() const {
return CalleeSaveStackHasFreeSpace;
}
@@ -481,6 +484,9 @@ public:
StackHazardCSRSlotIndex = Index;
}
+ bool hasSplitSVEObjects() const { return SplitSVEObjects; }
+ void setSplitSVEObjects(bool s) { SplitSVEObjects = s; }
+
SMEAttrs getSMEFnAttrs() const { return SMEFnAttrs; }
unsigned getSRetReturnReg() const { return SRetReturnReg; }
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 3de0d4d..a60548b 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -708,8 +708,6 @@ void AArch64PrologueEmitter::emitPrologue() {
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
- MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
-
StackOffset PPRCalleeSavesSize =
StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
StackOffset ZPRCalleeSavesSize =
@@ -717,76 +715,97 @@ void AArch64PrologueEmitter::emitPrologue() {
StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
+ std::optional<MachineBasicBlock::iterator> ZPRCalleeSavesBegin,
+ ZPRCalleeSavesEnd, PPRCalleeSavesBegin, PPRCalleeSavesEnd;
StackOffset CFAOffset =
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
-
if (!FPAfterSVECalleeSaves) {
- MachineBasicBlock::iterator ZPRCalleeSavesBegin = AfterGPRSavesI,
- ZPRCalleeSavesEnd = AfterGPRSavesI;
- MachineBasicBlock::iterator PPRCalleeSavesBegin = AfterGPRSavesI,
- PPRCalleeSavesEnd = AfterGPRSavesI;
-
- // Process the SVE callee-saves to determine what space needs to be
- // allocated.
-
+ // Process the SVE callee-saves to find the starts/ends of the ZPR and PPR
+ // areas.
+ PPRCalleeSavesBegin = AfterGPRSavesI;
if (PPRCalleeSavesSize) {
LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
<< PPRCalleeSavesSize.getScalable() << "\n");
- PPRCalleeSavesBegin = AfterSVESavesI;
- assert(isPartOfPPRCalleeSaves(PPRCalleeSavesBegin) &&
+ assert(isPartOfPPRCalleeSaves(*PPRCalleeSavesBegin) &&
"Unexpected instruction");
while (isPartOfPPRCalleeSaves(AfterSVESavesI) &&
AfterSVESavesI != MBB.getFirstTerminator())
++AfterSVESavesI;
- PPRCalleeSavesEnd = AfterSVESavesI;
}
-
+ PPRCalleeSavesEnd = ZPRCalleeSavesBegin = AfterSVESavesI;
if (ZPRCalleeSavesSize) {
LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
<< ZPRCalleeSavesSize.getScalable() << "\n");
- ZPRCalleeSavesBegin = AfterSVESavesI;
- assert(isPartOfZPRCalleeSaves(ZPRCalleeSavesBegin) &&
+ assert(isPartOfZPRCalleeSaves(*ZPRCalleeSavesBegin) &&
"Unexpected instruction");
while (isPartOfZPRCalleeSaves(AfterSVESavesI) &&
AfterSVESavesI != MBB.getFirstTerminator())
++AfterSVESavesI;
- ZPRCalleeSavesEnd = AfterSVESavesI;
}
-
- // Allocate space for the callee saves (if any).
- StackOffset LocalsSize =
- PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
- MachineBasicBlock::iterator CalleeSavesBegin =
- AFI->getPPRCalleeSavedStackSize() ? PPRCalleeSavesBegin
- : ZPRCalleeSavesBegin;
- allocateStackSpace(CalleeSavesBegin, 0, SVECalleeSavesSize,
- EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || LocalsSize);
-
- CalleeSavesEnd = AFI->getZPRCalleeSavedStackSize() ? ZPRCalleeSavesEnd
- : PPRCalleeSavesEnd;
+ ZPRCalleeSavesEnd = AfterSVESavesI;
}
- CFAOffset += SVECalleeSavesSize;
if (EmitAsyncCFI)
- emitCalleeSavedSVELocations(CalleeSavesEnd);
-
- // Allocate space for the rest of the frame including SVE locals. Align the
- // stack as necessary.
- assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
- "Cannot use redzone with stack realignment");
- if (!AFL.canUseRedZone(MF)) {
- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
- // the correct value here, as NumBytes also includes padding bytes,
- // which shouldn't be counted here.
- StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
- allocateStackSpace(CalleeSavesEnd, RealignmentPadding,
- SVELocalsSize + StackOffset::getFixed(NumBytes),
+ emitCalleeSavedSVELocations(AfterSVESavesI);
+
+ if (AFI->hasSplitSVEObjects()) {
+ assert(!FPAfterSVECalleeSaves &&
+ "Cannot use FPAfterSVECalleeSaves with aarch64-split-sve-objects");
+ assert(!AFL.canUseRedZone(MF) &&
+ "Cannot use redzone with aarch64-split-sve-objects");
+ // TODO: Handle HasWinCFI/NeedsWinCFI?
+ assert(!NeedsWinCFI &&
+ "WinCFI with aarch64-split-sve-objects is not supported");
+
+ // Split ZPR and PPR allocation.
+ // Allocate PPR callee saves
+ allocateStackSpace(*PPRCalleeSavesBegin, 0, PPRCalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || ZPRCalleeSavesSize ||
+ ZPRLocalsSize || PPRLocalsSize);
+ CFAOffset += PPRCalleeSavesSize;
+
+ // Allocate PPR locals + ZPR callee saves
+ assert(PPRCalleeSavesEnd == ZPRCalleeSavesBegin &&
+ "Expected ZPR callee saves after PPR locals");
+ allocateStackSpace(*PPRCalleeSavesEnd, RealignmentPadding,
+ PPRLocalsSize + ZPRCalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || ZPRLocalsSize);
+ CFAOffset += PPRLocalsSize + ZPRCalleeSavesSize;
+
+ // Allocate ZPR locals
+ allocateStackSpace(*ZPRCalleeSavesEnd, RealignmentPadding,
+ ZPRLocalsSize + StackOffset::getFixed(NumBytes),
EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects());
+ } else {
+ // Allocate space for the callee saves (if any).
+ StackOffset LocalsSize =
+ PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
+ if (!FPAfterSVECalleeSaves)
+ allocateStackSpace(AfterGPRSavesI, 0, SVECalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || LocalsSize);
+ CFAOffset += SVECalleeSavesSize;
+
+ // Allocate space for the rest of the frame including SVE locals. Align the
+ // stack as necessary.
+ assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
+ "Cannot use redzone with stack realignment");
+ if (!AFL.canUseRedZone(MF)) {
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
+ allocateStackSpace(AfterSVESavesI, RealignmentPadding,
+ SVELocalsSize + StackOffset::getFixed(NumBytes),
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects());
+ }
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -1220,6 +1239,7 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
return;
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
+ StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
std::optional<int64_t> IncomingVGOffsetFromDefCFA;
if (AFL.requiresSaveVG(MF)) {
@@ -1230,7 +1250,8 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
}
for (const auto &Info : CSI) {
- if (!MFI.isScalableStackID(Info.getFrameIdx()))
+ int FI = Info.getFrameIdx();
+ if (!MFI.isScalableStackID(FI))
continue;
// Not all unwinders may know about SVE registers, so assume the lowest
@@ -1241,9 +1262,13 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
continue;
StackOffset Offset =
- StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
+ StackOffset::getScalable(MFI.getObjectOffset(FI)) -
StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
+ if (AFI->hasSplitSVEObjects() &&
+ MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ Offset -= PPRStackSize;
+
CFIBuilder.insertCFIInst(
createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
}
@@ -1384,7 +1409,9 @@ void AArch64EpilogueEmitter::emitEpilogue() {
if (HasFP && AFI->hasSwiftAsyncContext())
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
- StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
+ StackOffset ZPRStackSize = AFL.getZPRStackSize(MF);
+ StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
@@ -1405,111 +1432,186 @@ void AArch64EpilogueEmitter::emitEpilogue() {
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
- // Process the SVE callee-saves to determine what space needs to be
- // deallocated.
- StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
- MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
- RestoreEnd = FirstGPRRestoreI;
- int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
- int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
- int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
-
- if (SVECalleeSavedSize) {
- if (FPAfterSVECalleeSaves)
- RestoreEnd = MBB.getFirstTerminator();
-
- RestoreBegin = std::prev(RestoreEnd);
- while (RestoreBegin != MBB.begin() &&
- isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
- --RestoreBegin;
-
- assert(isPartOfSVECalleeSaves(RestoreBegin) &&
- isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
- "Unexpected instruction");
-
- StackOffset CalleeSavedSizeAsOffset =
- StackOffset::getScalable(SVECalleeSavedSize);
- DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
- DeallocateAfter = CalleeSavedSizeAsOffset;
- }
+ if (!AFI->hasSplitSVEObjects()) {
+ // Process the SVE callee-saves to determine what space needs to be
+ // deallocated.
+ StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
+ MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
+ RestoreEnd = FirstGPRRestoreI;
+ int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
+ int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
+ int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
+
+ if (SVECalleeSavedSize) {
+ if (FPAfterSVECalleeSaves)
+ RestoreEnd = MBB.getFirstTerminator();
+
+ RestoreBegin = std::prev(RestoreEnd);
+ while (RestoreBegin != MBB.begin() &&
+ isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
+ --RestoreBegin;
+
+ assert(isPartOfSVECalleeSaves(RestoreBegin) &&
+ isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
+ "Unexpected instruction");
- // Deallocate the SVE area.
- if (FPAfterSVECalleeSaves) {
- // If the callee-save area is before FP, restoring the FP implicitly
- // deallocates non-callee-save SVE allocations. Otherwise, deallocate
- // them explicitly.
- if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
- emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
- DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
+ StackOffset CalleeSavedSizeAsOffset =
+ StackOffset::getScalable(SVECalleeSavedSize);
+ DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
+ DeallocateAfter = CalleeSavedSizeAsOffset;
}
- // Deallocate callee-save non-SVE registers.
- emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
- // Deallocate fixed objects.
- emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(FixedObject), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
- // Deallocate callee-save SVE registers.
- emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
- DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
- } else if (SVEStackSize) {
- int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
- // If we have stack realignment or variable-sized objects we must use the
- // FP to restore SVE callee saves (as there is an unknown amount of
- // data/padding between the SP and SVE CS area).
- Register BaseForSVEDealloc =
- (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
- : AArch64::SP;
- if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
- Register CalleeSaveBase = AArch64::FP;
- if (int64_t CalleeSaveBaseOffset =
- AFI->getCalleeSaveBaseToFrameRecordOffset()) {
- // If we have have an non-zero offset to the non-SVE CS base we need to
- // compute the base address by subtracting the offest in a temporary
- // register first (to avoid briefly deallocating the SVE CS).
- CalleeSaveBase =
- MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
- emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
- StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
- MachineInstr::FrameDestroy);
- }
- // The code below will deallocate the stack space space by moving the
- // SP to the start of the SVE callee-save area.
- emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
- StackOffset::getScalable(-SVECalleeSavedSize), TII,
- MachineInstr::FrameDestroy);
- } else if (BaseForSVEDealloc == AArch64::SP) {
- if (SVECalleeSavedSize) {
- // Deallocate the non-SVE locals first before we can deallocate (and
- // restore callee saves) from the SVE area.
- emitFrameOffset(
- MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
- false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
- SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
- NumBytes = 0;
+ // Deallocate the SVE area.
+ if (FPAfterSVECalleeSaves) {
+ // If the callee-save area is before FP, restoring the FP implicitly
+ // deallocates non-callee-save SVE allocations. Otherwise, deallocate
+ // them explicitly.
+ if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
+ emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
+ DeallocateBefore, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI);
}
+ // Deallocate callee-save non-SVE registers.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
- SVEStackSize +
- StackOffset::getFixed(NumBytes + PrologueSaveSize));
+ StackOffset::getFixed(AFI->getCalleeSavedStackSize()),
+ TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI);
+
+ // Deallocate fixed objects.
+ emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(FixedObject), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI);
+ // Deallocate callee-save SVE registers.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
- DeallocateAfter +
- StackOffset::getFixed(NumBytes + PrologueSaveSize));
+ NeedsWinCFI, &HasWinCFI);
+ } else if (SVEStackSize) {
+ int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
+ // If we have stack realignment or variable-sized objects we must use the
+ // FP to restore SVE callee saves (as there is an unknown amount of
+ // data/padding between the SP and SVE CS area).
+ Register BaseForSVEDealloc =
+ (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
+ : AArch64::SP;
+ if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
+ Register CalleeSaveBase = AArch64::FP;
+ if (int64_t CalleeSaveBaseOffset =
+ AFI->getCalleeSaveBaseToFrameRecordOffset()) {
+ // If we have have an non-zero offset to the non-SVE CS base we need
+ // to compute the base address by subtracting the offest in a
+ // temporary register first (to avoid briefly deallocating the SVE
+ // CS).
+ CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
+ &AArch64::GPR64RegClass);
+ emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
+ StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
+ MachineInstr::FrameDestroy);
+ }
+ // The code below will deallocate the stack space space by moving the
+ // SP to the start of the SVE callee-save area.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
+ StackOffset::getScalable(-SVECalleeSavedSize), TII,
+ MachineInstr::FrameDestroy);
+ } else if (BaseForSVEDealloc == AArch64::SP) {
+ if (SVECalleeSavedSize) {
+ // Deallocate the non-SVE locals first before we can deallocate (and
+ // restore callee saves) from the SVE area.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(NumBytes), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI, EmitCFI && !HasFP,
+ SVEStackSize + StackOffset::getFixed(
+ NumBytes + PrologueSaveSize));
+ NumBytes = 0;
+ }
+
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ DeallocateBefore, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ SVEStackSize +
+ StackOffset::getFixed(NumBytes + PrologueSaveSize));
+
+ emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
+ DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
+ NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ DeallocateAfter +
+ StackOffset::getFixed(NumBytes + PrologueSaveSize));
+ }
+
+ if (EmitCFI)
+ emitCalleeSavedSVERestores(RestoreEnd);
+ }
+ } else if (SVEStackSize) {
+ assert(!AFI->isStackRealigned() && !MFI.hasVarSizedObjects() &&
+ "TODO: Support stack realigment / variable-sized objects");
+ // SplitSVEObjects. Determine the sizes and starts/ends of the ZPR and PPR
+ // areas.
+ auto ZPRCalleeSavedSize =
+ StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
+ auto PPRCalleeSavedSize =
+ StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
+ StackOffset PPRLocalsSize = PPRStackSize - PPRCalleeSavedSize;
+ StackOffset ZPRLocalsSize = ZPRStackSize - ZPRCalleeSavedSize;
+
+ MachineBasicBlock::iterator PPRRestoreBegin = FirstGPRRestoreI,
+ PPRRestoreEnd = FirstGPRRestoreI;
+ if (PPRCalleeSavedSize) {
+ PPRRestoreBegin = std::prev(PPRRestoreEnd);
+ while (PPRRestoreBegin != MBB.begin() &&
+ isPartOfPPRCalleeSaves(std::prev(PPRRestoreBegin)))
+ --PPRRestoreBegin;
+ }
+
+ MachineBasicBlock::iterator ZPRRestoreBegin = PPRRestoreBegin,
+ ZPRRestoreEnd = PPRRestoreBegin;
+ if (ZPRCalleeSavedSize) {
+ ZPRRestoreBegin = std::prev(ZPRRestoreEnd);
+ while (ZPRRestoreBegin != MBB.begin() &&
+ isPartOfZPRCalleeSaves(std::prev(ZPRRestoreBegin)))
+ --ZPRRestoreBegin;
+ }
+
+ auto CFAOffset =
+ SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
+ if (PPRCalleeSavedSize || ZPRCalleeSavedSize) {
+ // Deallocate the non-SVE locals first before we can deallocate (and
+ // restore callee saves) from the SVE area.
+ auto NonSVELocals = StackOffset::getFixed(NumBytes);
+ emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
+ NonSVELocals, TII, MachineInstr::FrameDestroy, false,
+ false, nullptr, EmitCFI && !HasFP, CFAOffset);
+ NumBytes = 0;
+ CFAOffset -= NonSVELocals;
}
+
+ if (ZPRLocalsSize) {
+ emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
+ ZPRLocalsSize, TII, MachineInstr::FrameDestroy, false,
+ false, nullptr, EmitCFI && !HasFP, CFAOffset);
+ CFAOffset -= ZPRLocalsSize;
+ }
+
+ if (PPRLocalsSize || ZPRCalleeSavedSize) {
+ assert(PPRRestoreBegin == ZPRRestoreEnd &&
+ "Expected PPR restores after ZPR");
+ emitFrameOffset(MBB, PPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
+ PPRLocalsSize + ZPRCalleeSavedSize, TII,
+ MachineInstr::FrameDestroy, false, false, nullptr,
+ EmitCFI && !HasFP, CFAOffset);
+ CFAOffset -= PPRLocalsSize + ZPRCalleeSavedSize;
+ }
+ if (PPRCalleeSavedSize) {
+ emitFrameOffset(MBB, PPRRestoreEnd, DL, AArch64::SP, AArch64::SP,
+ PPRCalleeSavedSize, TII, MachineInstr::FrameDestroy,
+ false, false, nullptr, EmitCFI && !HasFP, CFAOffset);
+ }
+
+ // We only emit CFI information for ZPRs so emit CFI after the ZPR restores.
if (EmitCFI)
- emitCalleeSavedSVERestores(RestoreEnd);
+ emitCalleeSavedSVERestores(ZPRRestoreEnd);
}
if (!HasFP) {