diff options
author | Benjamin Maxwell <benjamin.maxwell@arm.com> | 2025-05-08 17:38:27 +0000 |
---|---|---|
committer | Benjamin Maxwell <benjamin.maxwell@arm.com> | 2025-09-18 14:19:04 +0000 |
commit | 0dfb0725e2a4f82af47821946bfbbfcd7ed08e10 (patch) | |
tree | d842fbc1e317e5a92da06fcf6e9d4dcd272fc048 /llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | |
parent | f840ecfde44167467f4ba2e06d19dadd12393378 (diff) | |
download | llvm-users/MacDue/prepare_split.zip llvm-users/MacDue/prepare_split.tar.gz llvm-users/MacDue/prepare_split.tar.bz2 |
[AArch64] Prepare for split ZPR and PPR area allocation (NFCI)users/MacDue/prepare_split
This patch attempts to refactor AArch64FrameLowering to allow the size
of the ZPR and PPR areas to be calculated separately. This will be used
by a subsequent patch to support allocating ZPRs and PPRs to separate
areas. This patch should be an NFC and is split out to make later
functional changes easier to spot.
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64FrameLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 220 |
1 files changed, 126 insertions, 94 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 20b0d69..f5f7b65 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -324,6 +324,36 @@ AArch64FrameLowering::getArgumentStackToRestore(MachineFunction &MF, static bool produceCompactUnwindFrame(const AArch64FrameLowering &, MachineFunction &MF); +enum class AssignObjectOffsets { No, Yes }; +/// Process all the SVE stack objects and the SVE stack size and offsets for +/// each object. If AssignOffsets is "Yes", the offsets get assigned (and SVE +/// stack sizes set). Returns the size of the SVE stack. +static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, + AssignObjectOffsets AssignOffsets, + bool SplitSVEObjects = false); + +static unsigned getStackHazardSize(const MachineFunction &MF) { + return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize(); +} + +/// Returns true if PPRs are spilled as ZPRs. +static bool arePPRsSpilledAsZPR(const MachineFunction &MF) { + return MF.getSubtarget().getRegisterInfo()->getSpillSize( + AArch64::PPRRegClass) == 16; +} + +StackOffset +AArch64FrameLowering::getZPRStackSize(const MachineFunction &MF) const { + const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + return StackOffset::getScalable(AFI->getStackSizeZPR()); +} + +StackOffset +AArch64FrameLowering::getPPRStackSize(const MachineFunction &MF) const { + const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + return StackOffset::getScalable(AFI->getStackSizePPR()); +} + // Conservatively, returns true if the function is likely to have SVE vectors // on the stack. This function is safe to be called before callee-saves or // object offsets have been determined. @@ -482,13 +512,6 @@ AArch64FrameLowering::getFixedObjectSize(const MachineFunction &MF, } } -/// Returns the size of the entire SVE stackframe (calleesaves + spills). -StackOffset -AArch64FrameLowering::getSVEStackSize(const MachineFunction &MF) const { - const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE()); -} - bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -514,7 +537,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { !Subtarget.hasSVE(); return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize || - getSVEStackSize(MF) || LowerQRegCopyThroughMem); + AFI->hasSVEStackSize() || LowerQRegCopyThroughMem); } /// hasFPImpl - Return true if the specified function should have a dedicated @@ -557,7 +580,7 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const { // CFA in either of these cases. if (AFI.needsDwarfUnwindInfo(MF) && ((requiresSaveVG(MF) || AFI.getSMEFnAttrs().hasStreamingBody()) && - (!AFI.hasCalculatedStackSizeSVE() || AFI.getStackSizeSVE() > 0))) + (!AFI.hasCalculatedStackSizeSVE() || AFI.hasSVEStackSize()))) return true; // With large callframes around we may need to use FP to access the scavenging // emergency spillslot. @@ -1126,10 +1149,6 @@ static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget<AArch64Subtarget>().isTargetWindows(); } -static unsigned getStackHazardSize(const MachineFunction &MF) { - return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize(); -} - void AArch64FrameLowering::emitPacRetPlusLeafHardening( MachineFunction &MF) const { const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); @@ -1212,7 +1231,9 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, const auto &MFI = MF.getFrameInfo(); int64_t ObjectOffset = MFI.getObjectOffset(FI); - StackOffset SVEStackSize = getSVEStackSize(MF); + StackOffset ZPRStackSize = getZPRStackSize(MF); + StackOffset PPRStackSize = getPPRStackSize(MF); + StackOffset SVEStackSize = ZPRStackSize + PPRStackSize; // For VLA-area objects, just emit an offset at the end of the stack frame. // Whilst not quite correct, these objects do live at the end of the frame and @@ -1313,7 +1334,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( bool isCSR = !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); - const StackOffset &SVEStackSize = getSVEStackSize(MF); + const StackOffset SVEStackSize = getSVEStackSize(MF); // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs or a dynamically realigned SP (and thus the SP isn't @@ -1615,10 +1636,13 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, FirstReg = Count - 1; } bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize(); - int ScalableByteOffset = - FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize(); + int ScalableByteOffset = FPAfterSVECalleeSaves + ? 0 + : AFI->getZPRCalleeSavedStackSize() + + AFI->getPPRCalleeSavedStackSize(); bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace(); Register LastReg = 0; + bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex(); // When iterating backwards, the loop condition relies on unsigned wraparound. for (unsigned i = FirstReg; i < Count; i += RegInc) { @@ -1648,7 +1672,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, } // Add the stack hazard size as we transition from GPR->FPR CSRs. - if (AFI->hasStackHazardSlotIndex() && + if (HasCSHazardPadding && (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) && AArch64InstrInfo::isFpOrNEON(RPI.Reg1)) ByteOffset += StackFillDir * StackHazardSize; @@ -1656,7 +1680,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, int Scale = TRI->getSpillSize(*RPI.RC); // Add the next reg to the pair if it is in the same register class. - if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) { + if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) { MCRegister NextReg = CSI[i + RegInc].getReg(); bool IsFirst = i == FirstReg; switch (RPI.Type) { @@ -2263,10 +2287,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; + const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); - const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); unsigned UnspilledCSGPR = AArch64::NoRegister; unsigned UnspilledCSGPRPaired = AArch64::NoRegister; @@ -2387,15 +2412,19 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // Calculates the callee saved stack size. unsigned CSStackSize = 0; - unsigned SVECSStackSize = 0; + unsigned ZPRCSStackSize = 0; + unsigned PPRCSStackSize = 0; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); for (unsigned Reg : SavedRegs.set_bits()) { auto *RC = TRI->getMinimalPhysRegClass(Reg); assert(RC && "expected register class!"); auto SpillSize = TRI->getSpillSize(*RC); - if (AArch64::PPRRegClass.contains(Reg) || - AArch64::ZPRRegClass.contains(Reg)) - SVECSStackSize += SpillSize; + bool IsZPR = AArch64::ZPRRegClass.contains(Reg); + bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg); + if (IsZPR || (IsPPR && arePPRsSpilledAsZPR(MF))) + ZPRCSStackSize += SpillSize; + else if (IsPPR) + PPRCSStackSize += SpillSize; else CSStackSize += SpillSize; } @@ -2405,17 +2434,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // only 64-bit GPRs can be added to SavedRegs. unsigned NumSavedRegs = SavedRegs.count(); - // Increase the callee-saved stack size if the function has streaming mode - // changes, as we will need to spill the value of the VG register. - if (requiresSaveVG(MF)) - CSStackSize += 8; - // Determine if a Hazard slot should be used, and increase the CSStackSize by // StackHazardSize if so. determineStackHazardSlot(MF, SavedRegs); if (AFI->hasStackHazardSlotIndex()) CSStackSize += getStackHazardSize(MF); + // Increase the callee-saved stack size if the function has streaming mode + // changes, as we will need to spill the value of the VG register. + if (requiresSaveVG(MF)) + CSStackSize += 8; + // If we must call __arm_get_current_vg in the prologue preserve the LR. if (requiresSaveVG(MF) && !Subtarget.hasSVE()) SavedRegs.set(AArch64::LR); @@ -2436,8 +2465,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, }); // If any callee-saved registers are used, the frame cannot be eliminated. - int64_t SVEStackSize = - alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16); + auto [ZPRLocalStackSize, PPRLocalStackSize] = + determineSVEStackSizes(MF, AssignObjectOffsets::No); + uint64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize; + uint64_t SVEStackSize = + alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16); bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize; // The CSR spill slots have not been allocated yet, so estimateStackSize @@ -2522,7 +2554,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // instructions. AFI->setCalleeSavedStackSize(AlignedCSStackSize); AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize); - AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16)); + AFI->setSVECalleeSavedStackSize(ZPRCSStackSize, alignTo(PPRCSStackSize, 16)); } bool AArch64FrameLowering::assignCalleeSavedSpillSlots( @@ -2661,7 +2693,6 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, assert((Max == std::numeric_limits<int>::min() || Max + 1 == CS.getFrameIdx()) && "SVE CalleeSaves are not consecutive"); - Min = std::min(Min, CS.getFrameIdx()); Max = std::max(Max, CS.getFrameIdx()); } @@ -2669,15 +2700,21 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, return Min != std::numeric_limits<int>::max(); } -// Process all the SVE stack objects and determine offsets for each -// object. If AssignOffsets is true, the offsets get assigned. -// Fills in the first and last callee-saved frame indices into -// Min/MaxCSFrameIndex, respectively. -// Returns the size of the stack. -static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, - int &MinCSFrameIndex, - int &MaxCSFrameIndex, - bool AssignOffsets) { +static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, + AssignObjectOffsets AssignOffsets, + bool SplitSVEObjects) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *AFI = MF.getInfo<AArch64FunctionInfo>(); + + SVEStackSizes SVEStack{}; + + // With SplitSVEObjects we maintain separate stack offsets for predicates + // (PPRs) and SVE vectors (ZPRs). When SplitSVEObjects is disabled predicates + // are included in the SVE vector area. + uint64_t &ZPRStackTop = SVEStack.ZPRStackSize; + uint64_t &PPRStackTop = + SplitSVEObjects ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize; + #ifndef NDEBUG // First process all fixed stack objects. for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) @@ -2686,26 +2723,42 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, "reference."); #endif - auto Assign = [&MFI](int FI, int64_t Offset) { + auto AllocateObject = [&](int FI) { + uint64_t &StackTop = MFI.getStackID(FI) == TargetStackID::ScalableVector + ? ZPRStackTop + : PPRStackTop; + + // FIXME: Given that the length of SVE vectors is not necessarily a power of + // two, we'd need to align every object dynamically at runtime if the + // alignment is larger than 16. This is not yet supported. + Align Alignment = MFI.getObjectAlign(FI); + if (Alignment > Align(16)) + report_fatal_error( + "Alignment of scalable vectors > 16 bytes is not yet supported"); + + StackTop += MFI.getObjectSize(FI); + StackTop = alignTo(StackTop, Alignment); + + assert(StackTop < std::numeric_limits<int64_t>::max() && + "SVE StackTop far too large?!"); + + int64_t Offset = -int64_t(StackTop); + if (AssignOffsets == AssignObjectOffsets::Yes) + MFI.setObjectOffset(FI, Offset); + LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n"); - MFI.setObjectOffset(FI, Offset); }; - int64_t Offset = 0; - // Then process all callee saved slots. + int MinCSFrameIndex, MaxCSFrameIndex; if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) { - // Assign offsets to the callee save slots. - for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) { - Offset += MFI.getObjectSize(I); - Offset = alignTo(Offset, MFI.getObjectAlign(I)); - if (AssignOffsets) - Assign(I, -Offset); - } + for (int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI) + AllocateObject(FI); } - // Ensure that the Callee-save area is aligned to 16bytes. - Offset = alignTo(Offset, Align(16U)); + // Ensure the CS area is 16-byte aligned. + PPRStackTop = alignTo(PPRStackTop, Align(16U)); + ZPRStackTop = alignTo(ZPRStackTop, Align(16U)); // Create a buffer of SVE objects to allocate and sort it. SmallVector<int, 8> ObjectsToAllocate; @@ -2715,50 +2768,34 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, int StackProtectorFI = -1; if (MFI.hasStackProtectorIndex()) { StackProtectorFI = MFI.getStackProtectorIndex(); - if (MFI.isScalableStackID(StackProtectorFI)) + if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector) ObjectsToAllocate.push_back(StackProtectorFI); } - for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { - if (!MFI.isScalableStackID(I)) - continue; - if (I == StackProtectorFI) + + for (int FI = 0, E = MFI.getObjectIndexEnd(); FI != E; ++FI) { + if (FI == StackProtectorFI || MFI.isDeadObjectIndex(FI)) continue; - if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex) + if (MaxCSFrameIndex >= FI && FI >= MinCSFrameIndex) continue; - if (MFI.isDeadObjectIndex(I)) + + if (MFI.getStackID(FI) != TargetStackID::ScalableVector && + MFI.getStackID(FI) != TargetStackID::ScalablePredicateVector) continue; - ObjectsToAllocate.push_back(I); + ObjectsToAllocate.push_back(FI); } // Allocate all SVE locals and spills - for (unsigned FI : ObjectsToAllocate) { - Align Alignment = MFI.getObjectAlign(FI); - // FIXME: Given that the length of SVE vectors is not necessarily a power of - // two, we'd need to align every object dynamically at runtime if the - // alignment is larger than 16. This is not yet supported. - if (Alignment > Align(16)) - report_fatal_error( - "Alignment of scalable vectors > 16 bytes is not yet supported"); + for (unsigned FI : ObjectsToAllocate) + AllocateObject(FI); - Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment); - if (AssignOffsets) - Assign(FI, -Offset); - } + PPRStackTop = alignTo(PPRStackTop, Align(16U)); + ZPRStackTop = alignTo(ZPRStackTop, Align(16U)); - return Offset; -} + if (AssignOffsets == AssignObjectOffsets::Yes) + AFI->setStackSizeSVE(SVEStack.ZPRStackSize, SVEStack.PPRStackSize); -int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets( - MachineFrameInfo &MFI) const { - int MinCSFrameIndex, MaxCSFrameIndex; - return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false); -} - -int64_t AArch64FrameLowering::assignSVEStackObjectOffsets( - MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const { - return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, - true); + return SVEStack; } /// Attempts to scavenge a register from \p ScavengeableRegs given the used @@ -3072,12 +3109,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && "Upwards growing stack unsupported"); - int MinCSFrameIndex, MaxCSFrameIndex; - int64_t SVEStackSize = - assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex); - - AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U)); - AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex); + (void)determineSVEStackSizes(MF, AssignObjectOffsets::Yes); // If this function isn't doing Win64-style C++ EH, we don't need to do // anything. @@ -3599,7 +3631,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( // Go to common code if we cannot provide sp + offset. if (MFI.hasVarSizedObjects() || - MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() || + MF.getInfo<AArch64FunctionInfo>()->hasSVEStackSize() || MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF)) return getFrameIndexReference(MF, FI, FrameReg); |