aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorBenjamin Maxwell <benjamin.maxwell@arm.com>2025-05-08 17:38:27 +0000
committerBenjamin Maxwell <benjamin.maxwell@arm.com>2025-09-18 14:19:04 +0000
commit0dfb0725e2a4f82af47821946bfbbfcd7ed08e10 (patch)
treed842fbc1e317e5a92da06fcf6e9d4dcd272fc048 /llvm/lib/Target
parentf840ecfde44167467f4ba2e06d19dadd12393378 (diff)
downloadllvm-users/MacDue/prepare_split.zip
llvm-users/MacDue/prepare_split.tar.gz
llvm-users/MacDue/prepare_split.tar.bz2
[AArch64] Prepare for split ZPR and PPR area allocation (NFCI)users/MacDue/prepare_split
This patch attempts to refactor AArch64FrameLowering to allow the size of the ZPR and PPR areas to be calculated separately. This will be used by a subsequent patch to support allocating ZPRs and PPRs to separate areas. This patch should be an NFC and is split out to make later functional changes easier to spot.
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp220
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h20
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp20
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h63
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp128
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp4
6 files changed, 284 insertions, 171 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 20b0d69..f5f7b65 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -324,6 +324,36 @@ AArch64FrameLowering::getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(const AArch64FrameLowering &,
MachineFunction &MF);
+enum class AssignObjectOffsets { No, Yes };
+/// Process all the SVE stack objects and the SVE stack size and offsets for
+/// each object. If AssignOffsets is "Yes", the offsets get assigned (and SVE
+/// stack sizes set). Returns the size of the SVE stack.
+static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
+ AssignObjectOffsets AssignOffsets,
+ bool SplitSVEObjects = false);
+
+static unsigned getStackHazardSize(const MachineFunction &MF) {
+ return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
+}
+
+/// Returns true if PPRs are spilled as ZPRs.
+static bool arePPRsSpilledAsZPR(const MachineFunction &MF) {
+ return MF.getSubtarget().getRegisterInfo()->getSpillSize(
+ AArch64::PPRRegClass) == 16;
+}
+
+StackOffset
+AArch64FrameLowering::getZPRStackSize(const MachineFunction &MF) const {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return StackOffset::getScalable(AFI->getStackSizeZPR());
+}
+
+StackOffset
+AArch64FrameLowering::getPPRStackSize(const MachineFunction &MF) const {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return StackOffset::getScalable(AFI->getStackSizePPR());
+}
+
// Conservatively, returns true if the function is likely to have SVE vectors
// on the stack. This function is safe to be called before callee-saves or
// object offsets have been determined.
@@ -482,13 +512,6 @@ AArch64FrameLowering::getFixedObjectSize(const MachineFunction &MF,
}
}
-/// Returns the size of the entire SVE stackframe (calleesaves + spills).
-StackOffset
-AArch64FrameLowering::getSVEStackSize(const MachineFunction &MF) const {
- const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
-}
-
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
@@ -514,7 +537,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
!Subtarget.hasSVE();
return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
- getSVEStackSize(MF) || LowerQRegCopyThroughMem);
+ AFI->hasSVEStackSize() || LowerQRegCopyThroughMem);
}
/// hasFPImpl - Return true if the specified function should have a dedicated
@@ -557,7 +580,7 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const {
// CFA in either of these cases.
if (AFI.needsDwarfUnwindInfo(MF) &&
((requiresSaveVG(MF) || AFI.getSMEFnAttrs().hasStreamingBody()) &&
- (!AFI.hasCalculatedStackSizeSVE() || AFI.getStackSizeSVE() > 0)))
+ (!AFI.hasCalculatedStackSizeSVE() || AFI.hasSVEStackSize())))
return true;
// With large callframes around we may need to use FP to access the scavenging
// emergency spillslot.
@@ -1126,10 +1149,6 @@ static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
-static unsigned getStackHazardSize(const MachineFunction &MF) {
- return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
-}
-
void AArch64FrameLowering::emitPacRetPlusLeafHardening(
MachineFunction &MF) const {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -1212,7 +1231,9 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
- StackOffset SVEStackSize = getSVEStackSize(MF);
+ StackOffset ZPRStackSize = getZPRStackSize(MF);
+ StackOffset PPRStackSize = getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// For VLA-area objects, just emit an offset at the end of the stack frame.
// Whilst not quite correct, these objects do live at the end of the frame and
@@ -1313,7 +1334,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
- const StackOffset &SVEStackSize = getSVEStackSize(MF);
+ const StackOffset SVEStackSize = getSVEStackSize(MF);
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -1615,10 +1636,13 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
FirstReg = Count - 1;
}
bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
- int ScalableByteOffset =
- FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize();
+ int ScalableByteOffset = FPAfterSVECalleeSaves
+ ? 0
+ : AFI->getZPRCalleeSavedStackSize() +
+ AFI->getPPRCalleeSavedStackSize();
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
Register LastReg = 0;
+ bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex();
// When iterating backwards, the loop condition relies on unsigned wraparound.
for (unsigned i = FirstReg; i < Count; i += RegInc) {
@@ -1648,7 +1672,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
}
// Add the stack hazard size as we transition from GPR->FPR CSRs.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (HasCSHazardPadding &&
(!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
AArch64InstrInfo::isFpOrNEON(RPI.Reg1))
ByteOffset += StackFillDir * StackHazardSize;
@@ -1656,7 +1680,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
int Scale = TRI->getSpillSize(*RPI.RC);
// Add the next reg to the pair if it is in the same register class.
- if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) {
+ if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
MCRegister NextReg = CSI[i + RegInc].getReg();
bool IsFirst = i == FirstReg;
switch (RPI.Type) {
@@ -2263,10 +2287,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
@@ -2387,15 +2412,19 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
- unsigned SVECSStackSize = 0;
+ unsigned ZPRCSStackSize = 0;
+ unsigned PPRCSStackSize = 0;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (unsigned Reg : SavedRegs.set_bits()) {
auto *RC = TRI->getMinimalPhysRegClass(Reg);
assert(RC && "expected register class!");
auto SpillSize = TRI->getSpillSize(*RC);
- if (AArch64::PPRRegClass.contains(Reg) ||
- AArch64::ZPRRegClass.contains(Reg))
- SVECSStackSize += SpillSize;
+ bool IsZPR = AArch64::ZPRRegClass.contains(Reg);
+ bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg);
+ if (IsZPR || (IsPPR && arePPRsSpilledAsZPR(MF)))
+ ZPRCSStackSize += SpillSize;
+ else if (IsPPR)
+ PPRCSStackSize += SpillSize;
else
CSStackSize += SpillSize;
}
@@ -2405,17 +2434,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// only 64-bit GPRs can be added to SavedRegs.
unsigned NumSavedRegs = SavedRegs.count();
- // Increase the callee-saved stack size if the function has streaming mode
- // changes, as we will need to spill the value of the VG register.
- if (requiresSaveVG(MF))
- CSStackSize += 8;
-
// Determine if a Hazard slot should be used, and increase the CSStackSize by
// StackHazardSize if so.
determineStackHazardSlot(MF, SavedRegs);
if (AFI->hasStackHazardSlotIndex())
CSStackSize += getStackHazardSize(MF);
+ // Increase the callee-saved stack size if the function has streaming mode
+ // changes, as we will need to spill the value of the VG register.
+ if (requiresSaveVG(MF))
+ CSStackSize += 8;
+
// If we must call __arm_get_current_vg in the prologue preserve the LR.
if (requiresSaveVG(MF) && !Subtarget.hasSVE())
SavedRegs.set(AArch64::LR);
@@ -2436,8 +2465,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
});
// If any callee-saved registers are used, the frame cannot be eliminated.
- int64_t SVEStackSize =
- alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
+ auto [ZPRLocalStackSize, PPRLocalStackSize] =
+ determineSVEStackSizes(MF, AssignObjectOffsets::No);
+ uint64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize;
+ uint64_t SVEStackSize =
+ alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16);
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
// The CSR spill slots have not been allocated yet, so estimateStackSize
@@ -2522,7 +2554,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// instructions.
AFI->setCalleeSavedStackSize(AlignedCSStackSize);
AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
- AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
+ AFI->setSVECalleeSavedStackSize(ZPRCSStackSize, alignTo(PPRCSStackSize, 16));
}
bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
@@ -2661,7 +2693,6 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
assert((Max == std::numeric_limits<int>::min() ||
Max + 1 == CS.getFrameIdx()) &&
"SVE CalleeSaves are not consecutive");
-
Min = std::min(Min, CS.getFrameIdx());
Max = std::max(Max, CS.getFrameIdx());
}
@@ -2669,15 +2700,21 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
return Min != std::numeric_limits<int>::max();
}
-// Process all the SVE stack objects and determine offsets for each
-// object. If AssignOffsets is true, the offsets get assigned.
-// Fills in the first and last callee-saved frame indices into
-// Min/MaxCSFrameIndex, respectively.
-// Returns the size of the stack.
-static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
- int &MinCSFrameIndex,
- int &MaxCSFrameIndex,
- bool AssignOffsets) {
+static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
+ AssignObjectOffsets AssignOffsets,
+ bool SplitSVEObjects) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ SVEStackSizes SVEStack{};
+
+ // With SplitSVEObjects we maintain separate stack offsets for predicates
+ // (PPRs) and SVE vectors (ZPRs). When SplitSVEObjects is disabled predicates
+ // are included in the SVE vector area.
+ uint64_t &ZPRStackTop = SVEStack.ZPRStackSize;
+ uint64_t &PPRStackTop =
+ SplitSVEObjects ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize;
+
#ifndef NDEBUG
// First process all fixed stack objects.
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
@@ -2686,26 +2723,42 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
"reference.");
#endif
- auto Assign = [&MFI](int FI, int64_t Offset) {
+ auto AllocateObject = [&](int FI) {
+ uint64_t &StackTop = MFI.getStackID(FI) == TargetStackID::ScalableVector
+ ? ZPRStackTop
+ : PPRStackTop;
+
+ // FIXME: Given that the length of SVE vectors is not necessarily a power of
+ // two, we'd need to align every object dynamically at runtime if the
+ // alignment is larger than 16. This is not yet supported.
+ Align Alignment = MFI.getObjectAlign(FI);
+ if (Alignment > Align(16))
+ report_fatal_error(
+ "Alignment of scalable vectors > 16 bytes is not yet supported");
+
+ StackTop += MFI.getObjectSize(FI);
+ StackTop = alignTo(StackTop, Alignment);
+
+ assert(StackTop < std::numeric_limits<int64_t>::max() &&
+ "SVE StackTop far too large?!");
+
+ int64_t Offset = -int64_t(StackTop);
+ if (AssignOffsets == AssignObjectOffsets::Yes)
+ MFI.setObjectOffset(FI, Offset);
+
LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
- MFI.setObjectOffset(FI, Offset);
};
- int64_t Offset = 0;
-
// Then process all callee saved slots.
+ int MinCSFrameIndex, MaxCSFrameIndex;
if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
- // Assign offsets to the callee save slots.
- for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
- Offset += MFI.getObjectSize(I);
- Offset = alignTo(Offset, MFI.getObjectAlign(I));
- if (AssignOffsets)
- Assign(I, -Offset);
- }
+ for (int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI)
+ AllocateObject(FI);
}
- // Ensure that the Callee-save area is aligned to 16bytes.
- Offset = alignTo(Offset, Align(16U));
+ // Ensure the CS area is 16-byte aligned.
+ PPRStackTop = alignTo(PPRStackTop, Align(16U));
+ ZPRStackTop = alignTo(ZPRStackTop, Align(16U));
// Create a buffer of SVE objects to allocate and sort it.
SmallVector<int, 8> ObjectsToAllocate;
@@ -2715,50 +2768,34 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
int StackProtectorFI = -1;
if (MFI.hasStackProtectorIndex()) {
StackProtectorFI = MFI.getStackProtectorIndex();
- if (MFI.isScalableStackID(StackProtectorFI))
+ if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector)
ObjectsToAllocate.push_back(StackProtectorFI);
}
- for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
- if (!MFI.isScalableStackID(I))
- continue;
- if (I == StackProtectorFI)
+
+ for (int FI = 0, E = MFI.getObjectIndexEnd(); FI != E; ++FI) {
+ if (FI == StackProtectorFI || MFI.isDeadObjectIndex(FI))
continue;
- if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
+ if (MaxCSFrameIndex >= FI && FI >= MinCSFrameIndex)
continue;
- if (MFI.isDeadObjectIndex(I))
+
+ if (MFI.getStackID(FI) != TargetStackID::ScalableVector &&
+ MFI.getStackID(FI) != TargetStackID::ScalablePredicateVector)
continue;
- ObjectsToAllocate.push_back(I);
+ ObjectsToAllocate.push_back(FI);
}
// Allocate all SVE locals and spills
- for (unsigned FI : ObjectsToAllocate) {
- Align Alignment = MFI.getObjectAlign(FI);
- // FIXME: Given that the length of SVE vectors is not necessarily a power of
- // two, we'd need to align every object dynamically at runtime if the
- // alignment is larger than 16. This is not yet supported.
- if (Alignment > Align(16))
- report_fatal_error(
- "Alignment of scalable vectors > 16 bytes is not yet supported");
+ for (unsigned FI : ObjectsToAllocate)
+ AllocateObject(FI);
- Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment);
- if (AssignOffsets)
- Assign(FI, -Offset);
- }
+ PPRStackTop = alignTo(PPRStackTop, Align(16U));
+ ZPRStackTop = alignTo(ZPRStackTop, Align(16U));
- return Offset;
-}
+ if (AssignOffsets == AssignObjectOffsets::Yes)
+ AFI->setStackSizeSVE(SVEStack.ZPRStackSize, SVEStack.PPRStackSize);
-int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
- MachineFrameInfo &MFI) const {
- int MinCSFrameIndex, MaxCSFrameIndex;
- return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
-}
-
-int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
- MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
- return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
- true);
+ return SVEStack;
}
/// Attempts to scavenge a register from \p ScavengeableRegs given the used
@@ -3072,12 +3109,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
"Upwards growing stack unsupported");
- int MinCSFrameIndex, MaxCSFrameIndex;
- int64_t SVEStackSize =
- assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
-
- AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
- AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
+ (void)determineSVEStackSizes(MF, AssignObjectOffsets::Yes);
// If this function isn't doing Win64-style C++ EH, we don't need to do
// anything.
@@ -3599,7 +3631,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
// Go to common code if we cannot provide sp + offset.
if (MFI.hasVarSizedObjects() ||
- MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() ||
+ MF.getInfo<AArch64FunctionInfo>()->hasSVEStackSize() ||
MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
return getFrameIndexReference(MF, FI, FrameReg);
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 20d1d6a..38aa28b1 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -24,6 +24,11 @@ class AArch64FunctionInfo;
class AArch64PrologueEmitter;
class AArch64EpilogueEmitter;
+struct SVEStackSizes {
+ uint64_t ZPRStackSize{0};
+ uint64_t PPRStackSize{0};
+};
+
class AArch64FrameLowering : public TargetFrameLowering {
public:
explicit AArch64FrameLowering()
@@ -147,7 +152,16 @@ public:
bool requiresSaveVG(const MachineFunction &MF) const;
- StackOffset getSVEStackSize(const MachineFunction &MF) const;
+ /// Returns the size of the entire ZPR stackframe (calleesaves + spills).
+ StackOffset getZPRStackSize(const MachineFunction &MF) const;
+
+ /// Returns the size of the entire PPR stackframe (calleesaves + spills).
+ StackOffset getPPRStackSize(const MachineFunction &MF) const;
+
+ /// Returns the size of the entire SVE stackframe (PPRs + ZPRs).
+ StackOffset getSVEStackSize(const MachineFunction &MF) const {
+ return getZPRStackSize(MF) + getPPRStackSize(MF);
+ }
friend class AArch64PrologueEpilogueCommon;
friend class AArch64PrologueEmitter;
@@ -167,10 +181,6 @@ private:
/// Returns true if CSRs should be paired.
bool producePairRegisters(MachineFunction &MF) const;
- int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const;
- int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
- int &MinCSFrameIndex,
- int &MaxCSFrameIndex) const;
/// Make a determination whether a Hazard slot is used and create it if
/// needed.
void determineStackHazardSlot(MachineFunction &MF,
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index a81f5b3..b3c9656 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -23,12 +23,21 @@
using namespace llvm;
+static std::optional<uint64_t>
+getSVEStackSize(const AArch64FunctionInfo &MFI,
+ uint64_t (AArch64FunctionInfo::*GetStackSize)() const) {
+ if (!MFI.hasCalculatedStackSizeSVE())
+ return std::nullopt;
+ return (MFI.*GetStackSize)();
+}
+
yaml::AArch64FunctionInfo::AArch64FunctionInfo(
const llvm::AArch64FunctionInfo &MFI)
: HasRedZone(MFI.hasRedZone()),
- StackSizeSVE(MFI.hasCalculatedStackSizeSVE()
- ? std::optional<uint64_t>(MFI.getStackSizeSVE())
- : std::nullopt),
+ StackSizeZPR(
+ getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizeZPR)),
+ StackSizePPR(
+ getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizePPR)),
HasStackFrame(MFI.hasStackFrame()
? std::optional<bool>(MFI.hasStackFrame())
: std::nullopt) {}
@@ -41,8 +50,9 @@ void AArch64FunctionInfo::initializeBaseYamlFields(
const yaml::AArch64FunctionInfo &YamlMFI) {
if (YamlMFI.HasRedZone)
HasRedZone = YamlMFI.HasRedZone;
- if (YamlMFI.StackSizeSVE)
- setStackSizeSVE(*YamlMFI.StackSizeSVE);
+ if (YamlMFI.StackSizeZPR || YamlMFI.StackSizePPR)
+ setStackSizeSVE(YamlMFI.StackSizeZPR.value_or(0),
+ YamlMFI.StackSizePPR.value_or(0));
if (YamlMFI.HasStackFrame)
setHasStackFrame(*YamlMFI.HasStackFrame);
}
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 897c7e8..4a79d9c 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -74,13 +74,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// Amount of stack frame size, not including callee-saved registers.
uint64_t LocalStackSize = 0;
- /// The start and end frame indices for the SVE callee saves.
- int MinSVECSFrameIndex = 0;
- int MaxSVECSFrameIndex = 0;
-
/// Amount of stack frame size used for saving callee-saved registers.
unsigned CalleeSavedStackSize = 0;
- unsigned SVECalleeSavedStackSize = 0;
+ unsigned ZPRCalleeSavedStackSize = 0;
+ unsigned PPRCalleeSavedStackSize = 0;
bool HasCalleeSavedStackSize = false;
bool HasSVECalleeSavedStackSize = false;
@@ -137,9 +134,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// SVE stack size (for predicates and data vectors) are maintained here
/// rather than in FrameInfo, as the placement and Stack IDs are target
/// specific.
- uint64_t StackSizeSVE = 0;
+ uint64_t StackSizeZPR = 0;
+ uint64_t StackSizePPR = 0;
- /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid.
+ /// HasCalculatedStackSizeSVE indicates whether StackSizeZPR/PPR is valid.
bool HasCalculatedStackSizeSVE = false;
/// Has a value when it is known whether or not the function uses a
@@ -312,16 +310,25 @@ public:
TailCallReservedStack = bytes;
}
- bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
-
- void setStackSizeSVE(uint64_t S) {
+ void setStackSizeSVE(uint64_t ZPR, uint64_t PPR) {
+ StackSizeZPR = ZPR;
+ StackSizePPR = PPR;
HasCalculatedStackSizeSVE = true;
- StackSizeSVE = S;
}
- uint64_t getStackSizeSVE() const {
+ uint64_t getStackSizeZPR() const {
assert(hasCalculatedStackSizeSVE());
- return StackSizeSVE;
+ return StackSizeZPR;
+ }
+ uint64_t getStackSizePPR() const {
+ assert(hasCalculatedStackSizeSVE());
+ return StackSizePPR;
+ }
+
+ bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
+
+ bool hasSVEStackSize() const {
+ return getStackSizeZPR() > 0 || getStackSizePPR() > 0;
}
bool hasStackFrame() const { return HasStackFrame; }
@@ -414,23 +421,25 @@ public:
}
// Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes'
- void setSVECalleeSavedStackSize(unsigned Size) {
- SVECalleeSavedStackSize = Size;
+ void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR) {
+ ZPRCalleeSavedStackSize = ZPR;
+ PPRCalleeSavedStackSize = PPR;
HasSVECalleeSavedStackSize = true;
}
- unsigned getSVECalleeSavedStackSize() const {
+ unsigned getZPRCalleeSavedStackSize() const {
assert(HasSVECalleeSavedStackSize &&
- "SVECalleeSavedStackSize has not been calculated");
- return SVECalleeSavedStackSize;
+ "ZPRCalleeSavedStackSize has not been calculated");
+ return ZPRCalleeSavedStackSize;
}
-
- void setMinMaxSVECSFrameIndex(int Min, int Max) {
- MinSVECSFrameIndex = Min;
- MaxSVECSFrameIndex = Max;
+ unsigned getPPRCalleeSavedStackSize() const {
+ assert(HasSVECalleeSavedStackSize &&
+ "PPRCalleeSavedStackSize has not been calculated");
+ return PPRCalleeSavedStackSize;
}
- int getMinSVECSFrameIndex() const { return MinSVECSFrameIndex; }
- int getMaxSVECSFrameIndex() const { return MaxSVECSFrameIndex; }
+ unsigned getSVECalleeSavedStackSize() const {
+ return getZPRCalleeSavedStackSize() + getPPRCalleeSavedStackSize();
+ }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
unsigned getNumLocalDynamicTLSAccesses() const {
@@ -611,7 +620,8 @@ private:
namespace yaml {
struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
std::optional<bool> HasRedZone;
- std::optional<uint64_t> StackSizeSVE;
+ std::optional<uint64_t> StackSizeZPR;
+ std::optional<uint64_t> StackSizePPR;
std::optional<bool> HasStackFrame;
AArch64FunctionInfo() = default;
@@ -624,7 +634,8 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
template <> struct MappingTraits<AArch64FunctionInfo> {
static void mapping(IO &YamlIO, AArch64FunctionInfo &MFI) {
YamlIO.mapOptional("hasRedZone", MFI.HasRedZone);
- YamlIO.mapOptional("stackSizeSVE", MFI.StackSizeSVE);
+ YamlIO.mapOptional("stackSizeZPR", MFI.StackSizeZPR);
+ YamlIO.mapOptional("stackSizePPR", MFI.StackSizePPR);
YamlIO.mapOptional("hasStackFrame", MFI.HasStackFrame);
}
};
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 78777b5..3de0d4d 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -48,21 +48,19 @@ bool AArch64PrologueEpilogueCommon::isVGInstruction(
return Opc == TargetOpcode::COPY;
}
-// Convenience function to determine whether I is an SVE callee save.
-static bool isSVECalleeSave(MachineBasicBlock::iterator I) {
+// Convenience function to determine whether I is part of the ZPR callee saves.
+static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I) {
switch (I->getOpcode()) {
default:
return false;
- case AArch64::PTRUE_C_B:
case AArch64::LD1B_2Z_IMM:
case AArch64::ST1B_2Z_IMM:
case AArch64::STR_ZXI:
- case AArch64::STR_PXI:
case AArch64::LDR_ZXI:
- case AArch64::LDR_PXI:
- case AArch64::PTRUE_B:
case AArch64::CPY_ZPzI_B:
case AArch64::CMPNE_PPzZI_B:
+ case AArch64::PTRUE_C_B:
+ case AArch64::PTRUE_B:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
case AArch64::SEH_SavePReg:
@@ -71,6 +69,23 @@ static bool isSVECalleeSave(MachineBasicBlock::iterator I) {
}
}
+// Convenience function to determine whether I is part of the PPR callee saves.
+static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I) {
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case AArch64::STR_PXI:
+ case AArch64::LDR_PXI:
+ return I->getFlag(MachineInstr::FrameSetup) ||
+ I->getFlag(MachineInstr::FrameDestroy);
+ }
+}
+
+// Convenience function to determine whether I is part of the SVE callee saves.
+static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I) {
+ return isPartOfZPRCalleeSaves(I) || isPartOfPPRCalleeSaves(I);
+}
+
AArch64PrologueEpilogueCommon::AArch64PrologueEpilogueCommon(
MachineFunction &MF, MachineBasicBlock &MBB,
const AArch64FrameLowering &AFL)
@@ -316,7 +331,7 @@ bool AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
// When there is an SVE area on the stack, always allocate the
// callee-saves and spills/locals separately.
- if (AFL.getSVEStackSize(MF))
+ if (AFI->hasSVEStackSize())
return false;
return true;
@@ -632,7 +647,7 @@ void AArch64PrologueEmitter::emitPrologue() {
// Now allocate space for the GPR callee saves.
MachineBasicBlock::iterator MBBI = PrologueBeginI;
- while (MBBI != EndI && isSVECalleeSave(MBBI))
+ while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
++MBBI;
FirstGPRSaveI = convertCalleeSaveRestoreToSPPrePostIncDec(
MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
@@ -662,7 +677,7 @@ void AArch64PrologueEmitter::emitPrologue() {
MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
while (AfterGPRSavesI != EndI &&
AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
- !isSVECalleeSave(AfterGPRSavesI)) {
+ !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
if (CombineSPBump &&
// Only fix-up frame-setup load/store instructions.
(!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
@@ -693,38 +708,66 @@ void AArch64PrologueEmitter::emitPrologue() {
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
- StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
- StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
+ StackOffset PPRCalleeSavesSize =
+ StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
+ StackOffset ZPRCalleeSavesSize =
+ StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
+ StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
+ StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
+ StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
+
StackOffset CFAOffset =
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
-
- // Process the SVE callee-saves to determine what space needs to be
- // allocated.
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
- LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
- << "\n");
- SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
- SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
- // Find callee save instructions in frame.
- // Note: With FPAfterSVECalleeSaves the callee saves have already been
+
+ if (!FPAfterSVECalleeSaves) {
+ MachineBasicBlock::iterator ZPRCalleeSavesBegin = AfterGPRSavesI,
+ ZPRCalleeSavesEnd = AfterGPRSavesI;
+ MachineBasicBlock::iterator PPRCalleeSavesBegin = AfterGPRSavesI,
+ PPRCalleeSavesEnd = AfterGPRSavesI;
+
+ // Process the SVE callee-saves to determine what space needs to be
// allocated.
- if (!FPAfterSVECalleeSaves) {
- MachineBasicBlock::iterator CalleeSavesBegin = AfterGPRSavesI;
- assert(isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
- while (isSVECalleeSave(AfterSVESavesI) &&
+
+ if (PPRCalleeSavesSize) {
+ LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
+ << PPRCalleeSavesSize.getScalable() << "\n");
+
+ PPRCalleeSavesBegin = AfterSVESavesI;
+ assert(isPartOfPPRCalleeSaves(PPRCalleeSavesBegin) &&
+ "Unexpected instruction");
+ while (isPartOfPPRCalleeSaves(AfterSVESavesI) &&
AfterSVESavesI != MBB.getFirstTerminator())
++AfterSVESavesI;
- CalleeSavesEnd = AfterSVESavesI;
+ PPRCalleeSavesEnd = AfterSVESavesI;
+ }
- StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
- // Allocate space for the callee saves (if any).
- allocateStackSpace(CalleeSavesBegin, 0, SVECalleeSavesSize,
- EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || LocalsSize);
+ if (ZPRCalleeSavesSize) {
+ LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
+ << ZPRCalleeSavesSize.getScalable() << "\n");
+ ZPRCalleeSavesBegin = AfterSVESavesI;
+ assert(isPartOfZPRCalleeSaves(ZPRCalleeSavesBegin) &&
+ "Unexpected instruction");
+ while (isPartOfZPRCalleeSaves(AfterSVESavesI) &&
+ AfterSVESavesI != MBB.getFirstTerminator())
+ ++AfterSVESavesI;
+ ZPRCalleeSavesEnd = AfterSVESavesI;
}
+
+ // Allocate space for the callee saves (if any).
+ StackOffset LocalsSize =
+ PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
+ MachineBasicBlock::iterator CalleeSavesBegin =
+ AFI->getPPRCalleeSavedStackSize() ? PPRCalleeSavesBegin
+ : ZPRCalleeSavesBegin;
+ allocateStackSpace(CalleeSavesBegin, 0, SVECalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || LocalsSize);
+
+ CalleeSavesEnd = AFI->getZPRCalleeSavedStackSize() ? ZPRCalleeSavesEnd
+ : PPRCalleeSavesEnd;
}
CFAOffset += SVECalleeSavesSize;
@@ -739,6 +782,7 @@ void AArch64PrologueEmitter::emitPrologue() {
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
+ StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
allocateStackSpace(CalleeSavesEnd, RealignmentPadding,
SVELocalsSize + StackOffset::getFixed(NumBytes),
EmitAsyncCFI && !HasFP, CFAOffset,
@@ -789,7 +833,8 @@ void AArch64PrologueEmitter::emitPrologue() {
emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
} else {
StackOffset TotalSize =
- SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+ AFL.getSVEStackSize(MF) +
+ StackOffset::getFixed((int64_t)MFI.getStackSize());
CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
CFIBuilder.insertCFIInst(
createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
@@ -1315,7 +1360,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
while (FirstGPRRestoreI != Begin) {
--FirstGPRRestoreI;
if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
- (!FPAfterSVECalleeSaves && isSVECalleeSave(FirstGPRRestoreI))) {
+ (!FPAfterSVECalleeSaves && isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
++FirstGPRRestoreI;
break;
} else if (CombineSPBump)
@@ -1339,7 +1384,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
if (HasFP && AFI->hasSwiftAsyncContext())
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
- const StackOffset &SVEStackSize = AFL.getSVEStackSize(MF);
+ StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
@@ -1365,20 +1410,25 @@ void AArch64EpilogueEmitter::emitEpilogue() {
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
RestoreEnd = FirstGPRRestoreI;
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+ int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
+ int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
+ int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
+
+ if (SVECalleeSavedSize) {
if (FPAfterSVECalleeSaves)
RestoreEnd = MBB.getFirstTerminator();
RestoreBegin = std::prev(RestoreEnd);
while (RestoreBegin != MBB.begin() &&
- isSVECalleeSave(std::prev(RestoreBegin)))
+ isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
--RestoreBegin;
- assert(isSVECalleeSave(RestoreBegin) &&
- isSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
+ assert(isPartOfSVECalleeSaves(RestoreBegin) &&
+ isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
+ "Unexpected instruction");
StackOffset CalleeSavedSizeAsOffset =
- StackOffset::getScalable(CalleeSavedSize);
+ StackOffset::getScalable(SVECalleeSavedSize);
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
DeallocateAfter = CalleeSavedSizeAsOffset;
}
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 2b0c8ad..3f43b70 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -643,7 +643,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
if (ST.hasSVE() || ST.isStreaming()) {
// Frames that have variable sized objects and scalable SVE objects,
// should always use a basepointer.
- if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
+ if (!AFI->hasCalculatedStackSizeSVE() || AFI->hasSVEStackSize())
return true;
}
@@ -783,7 +783,7 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
AFI->hasCalculatedStackSizeSVE()) &&
"Expected SVE area to be calculated by this point");
- return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() &&
+ return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->hasSVEStackSize() &&
!AFI->hasStackHazardSlotIndex();
}