diff options
author | Benjamin Maxwell <benjamin.maxwell@arm.com> | 2025-10-02 19:05:14 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-10-02 19:05:14 +0100 |
commit | 8f67cdd9b7f4ffa3cca552b00d58e72dba66b924 (patch) | |
tree | b4b257e477c351c1ba26ef3b54b21abcb1ec7307 /llvm/lib | |
parent | b86ddae1da651f921125a9864b45a5b11bc3b1c0 (diff) | |
download | llvm-8f67cdd9b7f4ffa3cca552b00d58e72dba66b924.zip llvm-8f67cdd9b7f4ffa3cca552b00d58e72dba66b924.tar.gz llvm-8f67cdd9b7f4ffa3cca552b00d58e72dba66b924.tar.bz2 |
[AArch64][SME] Support split ZPR and PPR area allocation (#142392)
For a while we have supported the `-aarch64-stack-hazard-size=<size>`
option, which adds "hazard padding" between GPRs and FPR/ZPRs. However,
there is currently a hole in this mitigation as PPR and FPR/ZPR accesses
to the same area also cause streaming memory hazards (this is noted by
`-pass-remarks-analysis=sme -aarch64-stack-hazard-remark-size=<val>`),
and the current stack layout places PPRs and ZPRs within the same area.
Which looks like:
```
------------------------------------ Higher address
| callee-saved gpr registers |
|---------------------------------- |
| lr,fp (a.k.a. "frame record") |
|-----------------------------------| <- fp(=x29)
| <hazard padding> |
|-----------------------------------|
| callee-saved fp/simd/SVE regs |
|-----------------------------------|
| SVE stack objects |
|-----------------------------------|
| local variables of fixed size |
| <FPR> |
| <hazard padding> |
| <GPR> |
------------------------------------| <- sp
| Lower address
```
With this patch the stack (and hazard padding) is rearranged so that
hazard padding is placed between the PPRs and ZPRs rather than within
the (fixed size) callee-save region. Which looks something like this:
```
------------------------------------ Higher address
| callee-saved gpr registers |
|---------------------------------- |
| lr,fp (a.k.a. "frame record") |
|-----------------------------------| <- fp(=x29)
| callee-saved PPRs |
| PPR stack objects | (These are SVE predicates)
|-----------------------------------|
| <hazard padding> |
|-----------------------------------|
| callee-saved ZPR regs | (These are SVE vectors)
| ZPR stack objects | Note: FPRs are promoted to ZPRs
|-----------------------------------|
| local variables of fixed size |
| <FPR> |
| <hazard padding> |
| <GPR> |
------------------------------------| <- sp
| Lower address
```
This layout is only enabled if:
* SplitSVEObjects are enabled (`-aarch64-split-sve-objects`)
- (This may be enabled by default in a later patch)
* Streaming memory hazards are present
- (`-aarch64-stack-hazard-size=<val>` != 0)
* PPRs and FPRs/ZPRs are on the stack
* There's no stack realignment or variable-sized objects
- This is left as a TODO for now
Additionally, any FPR callee-saves that are present will be promoted to
ZPRs. This is to prevent stack hazards between FPRs and GRPs in the
fixed size callee-save area (which would otherwise require more hazard
padding, or moving the FPR callee-saves).
This layout should resolve the hole in the hazard padding mitigation,
and is not intended change codegen for non-SME code.
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 297 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h | 20 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp | 125 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 11 |
9 files changed, 384 insertions, 90 deletions
diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp index ec75dc3..64e5cd5 100644 --- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp +++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp @@ -72,7 +72,7 @@ struct StackFrameLayoutAnalysis { : Slot(Idx), Size(MFI.getObjectSize(Idx)), Align(MFI.getObjectAlign(Idx).value()), Offset(Offset), SlotTy(Invalid), Scalable(false) { - Scalable = MFI.isScalableStackID(Idx); + Scalable = MFI.hasScalableStackID(Idx); if (MFI.isSpillSlotObjectIndex(Idx)) SlotTy = SlotType::Spill; else if (MFI.isFixedObjectIndex(Idx)) diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 79655e1..0f4bbfc3 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -1610,7 +1610,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, int BaseOffset = -AFI->getTaggedBasePointerOffset(); Register FrameReg; StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( - MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, + MF, BaseOffset, false /*isFixed*/, TargetStackID::Default /*StackID*/, + FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); Register SrcReg = FrameReg; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index f5f7b65..8d6eb91 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -56,15 +56,20 @@ // | async context if needed | // | (a.k.a. "frame record") | // |-----------------------------------| <- fp(=x29) -// | <hazard padding> | -// |-----------------------------------| -// | | -// | callee-saved fp/simd/SVE regs | -// | | -// |-----------------------------------| -// | | -// | SVE stack objects | -// | | +// Default SVE stack layout Split SVE objects +// (aarch64-split-sve-objects=false) (aarch64-split-sve-objects=true) +// |-----------------------------------| |-----------------------------------| +// | <hazard padding> | | callee-saved PPR registers | +// |-----------------------------------| |-----------------------------------| +// | | | PPR stack objects | +// | callee-saved fp/simd/SVE regs | |-----------------------------------| +// | | | <hazard padding> | +// |-----------------------------------| |-----------------------------------| +// | | | callee-saved ZPR/FPR registers | +// | SVE stack objects | |-----------------------------------| +// | | | ZPR stack objects | +// |-----------------------------------| |-----------------------------------| +// ^ NB: FPR CSRs are promoted to ZPRs // |-----------------------------------| // |.empty.space.to.make.part.below....| // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at @@ -274,6 +279,11 @@ static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects", cl::desc("sort stack allocations"), cl::init(true), cl::Hidden); +static cl::opt<bool> + SplitSVEObjects("aarch64-split-sve-objects", + cl::desc("Split allocation of ZPR & PPR objects"), + cl::init(false), cl::Hidden); + cl::opt<bool> EnableHomogeneousPrologEpilog( "homogeneous-prolog-epilog", cl::Hidden, cl::desc("Emit homogeneous prologue and epilogue for the size " @@ -329,8 +339,7 @@ enum class AssignObjectOffsets { No, Yes }; /// each object. If AssignOffsets is "Yes", the offsets get assigned (and SVE /// stack sizes set). Returns the size of the SVE stack. static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, - AssignObjectOffsets AssignOffsets, - bool SplitSVEObjects = false); + AssignObjectOffsets AssignOffsets); static unsigned getStackHazardSize(const MachineFunction &MF) { return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize(); @@ -350,8 +359,13 @@ AArch64FrameLowering::getZPRStackSize(const MachineFunction &MF) const { StackOffset AArch64FrameLowering::getPPRStackSize(const MachineFunction &MF) const { + // With split SVE objects, the hazard padding is added to the PPR region, + // which places it between the [GPR, PPR] area and the [ZPR, FPR] area. This + // avoids hazards between both GPRs and FPRs and ZPRs and PPRs. const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - return StackOffset::getScalable(AFI->getStackSizePPR()); + return StackOffset::get(AFI->hasSplitSVEObjects() ? getStackHazardSize(MF) + : 0, + AFI->getStackSizePPR()); } // Conservatively, returns true if the function is likely to have SVE vectors @@ -368,7 +382,7 @@ static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL, const MachineFrameInfo &MFI = MF.getFrameInfo(); for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd(); FI++) { - if (MFI.isScalableStackID(FI)) + if (MFI.hasScalableStackID(FI)) return true; } @@ -1249,11 +1263,21 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, const auto *AFI = MF.getInfo<AArch64FunctionInfo>(); bool FPAfterSVECalleeSaves = isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize(); - if (MFI.isScalableStackID(FI)) { + if (MFI.hasScalableStackID(FI)) { if (FPAfterSVECalleeSaves && - -ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) + -ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) { + assert(!AFI->hasSplitSVEObjects() && + "split-sve-objects not supported with FPAfterSVECalleeSaves"); return StackOffset::getScalable(ObjectOffset); - return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()), + } + StackOffset AccessOffset{}; + // The scalable vectors are below (lower address) the scalable predicates + // with split SVE objects, so we must subtract the size of the predicates. + if (AFI->hasSplitSVEObjects() && + MFI.getStackID(FI) == TargetStackID::ScalableVector) + AccessOffset = -PPRStackSize; + return AccessOffset + + StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()), ObjectOffset); } @@ -1315,14 +1339,15 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference( const auto &MFI = MF.getFrameInfo(); int64_t ObjectOffset = MFI.getObjectOffset(FI); bool isFixed = MFI.isFixedObjectIndex(FI); - bool isSVE = MFI.isScalableStackID(FI); - return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg, - PreferFP, ForSimm); + auto StackID = static_cast<TargetStackID::Value>(MFI.getStackID(FI)); + return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, StackID, + FrameReg, PreferFP, ForSimm); } StackOffset AArch64FrameLowering::resolveFrameOffsetReference( - const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE, - Register &FrameReg, bool PreferFP, bool ForSimm) const { + const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, + TargetStackID::Value StackID, Register &FrameReg, bool PreferFP, + bool ForSimm) const { const auto &MFI = MF.getFrameInfo(); const auto *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); @@ -1333,8 +1358,11 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed(); bool isCSR = !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); + bool isSVE = MFI.isScalableStackID(StackID); - const StackOffset SVEStackSize = getSVEStackSize(MF); + StackOffset ZPRStackSize = getZPRStackSize(MF); + StackOffset PPRStackSize = getPPRStackSize(MF); + StackOffset SVEStackSize = ZPRStackSize + PPRStackSize; // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs or a dynamically realigned SP (and thus the SP isn't @@ -1409,12 +1437,25 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize(); if (isSVE) { - StackOffset FPOffset = - StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset); + StackOffset FPOffset = StackOffset::get( + -AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset); StackOffset SPOffset = SVEStackSize + StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(), ObjectOffset); + + // With split SVE objects the ObjectOffset is relative to the split area + // (i.e. the PPR area or ZPR area respectively). + if (AFI->hasSplitSVEObjects() && StackID == TargetStackID::ScalableVector) { + // If we're accessing an SVE vector with split SVE objects... + // - From the FP we need to move down past the PPR area: + FPOffset -= PPRStackSize; + // - From the SP we only need to move up to the ZPR area: + SPOffset -= PPRStackSize; + // Note: `SPOffset = SVEStackSize + ...`, so `-= PPRStackSize` results in + // `SPOffset = ZPRStackSize + ...`. + } + if (FPAfterSVECalleeSaves) { FPOffset += StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()); if (-ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) { @@ -1422,6 +1463,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( SPOffset += StackOffset::getFixed(AFI->getCalleeSavedStackSize()); } } + // Always use the FP for SVE spills if available and beneficial. if (hasFP(MF) && (SPOffset.getFixed() || FPOffset.getScalable() < SPOffset.getScalable() || @@ -1429,13 +1471,13 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; } - FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() : (unsigned)AArch64::SP; + return SPOffset; } - StackOffset ScalableOffset = {}; + StackOffset SVEAreaOffset = {}; if (FPAfterSVECalleeSaves) { // In this stack layout, the FP is in between the callee saves and other // SVE allocations. @@ -1443,25 +1485,25 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()); if (UseFP) { if (isFixed) - ScalableOffset = SVECalleeSavedStack; + SVEAreaOffset = SVECalleeSavedStack; else if (!isCSR) - ScalableOffset = SVECalleeSavedStack - SVEStackSize; + SVEAreaOffset = SVECalleeSavedStack - SVEStackSize; } else { if (isFixed) - ScalableOffset = SVEStackSize; + SVEAreaOffset = SVEStackSize; else if (isCSR) - ScalableOffset = SVEStackSize - SVECalleeSavedStack; + SVEAreaOffset = SVEStackSize - SVECalleeSavedStack; } } else { if (UseFP && !(isFixed || isCSR)) - ScalableOffset = -SVEStackSize; + SVEAreaOffset = -SVEStackSize; if (!UseFP && (isFixed || isCSR)) - ScalableOffset = SVEStackSize; + SVEAreaOffset = SVEStackSize; } if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); - return StackOffset::getFixed(FPOffset) + ScalableOffset; + return StackOffset::getFixed(FPOffset) + SVEAreaOffset; } // Use the base pointer if we have one. @@ -1478,7 +1520,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( Offset -= AFI->getLocalStackSize(); } - return StackOffset::getFixed(Offset) + ScalableOffset; + return StackOffset::getFixed(Offset) + SVEAreaOffset; } static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { @@ -1635,14 +1677,25 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, RegInc = -1; FirstReg = Count - 1; } + bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize(); - int ScalableByteOffset = FPAfterSVECalleeSaves - ? 0 - : AFI->getZPRCalleeSavedStackSize() + - AFI->getPPRCalleeSavedStackSize(); + + int ZPRByteOffset = 0; + int PPRByteOffset = 0; + bool SplitPPRs = AFI->hasSplitSVEObjects(); + if (SplitPPRs) { + ZPRByteOffset = AFI->getZPRCalleeSavedStackSize(); + PPRByteOffset = AFI->getPPRCalleeSavedStackSize(); + } else if (!FPAfterSVECalleeSaves) { + ZPRByteOffset = + AFI->getZPRCalleeSavedStackSize() + AFI->getPPRCalleeSavedStackSize(); + // Unused: Everything goes in ZPR space. + PPRByteOffset = 0; + } + bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace(); Register LastReg = 0; - bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex(); + bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex() && !SplitPPRs; // When iterating backwards, the loop condition relies on unsigned wraparound. for (unsigned i = FirstReg; i < Count; i += RegInc) { @@ -1671,6 +1724,10 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, llvm_unreachable("Unsupported register class."); } + int &ScalableByteOffset = RPI.Type == RegPairInfo::PPR && SplitPPRs + ? PPRByteOffset + : ZPRByteOffset; + // Add the stack hazard size as we transition from GPR->FPR CSRs. if (HasCSHazardPadding && (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) && @@ -2227,6 +2284,13 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI, return getMMOFrameID(*MI.memoperands_begin(), MFI); } +// Returns true if the LDST MachineInstr \p MI is a PPR access. +static bool isPPRAccess(const MachineInstr &MI) { + return MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO && + MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO && + AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()); +} + // Check if a Hazard slot is needed for the current function, and if so create // one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex, // which can be used to determine if any hazard padding is needed. @@ -2250,25 +2314,50 @@ void AArch64FrameLowering::determineStackHazardSlot( bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) { return AArch64::FPR64RegClass.contains(Reg) || AArch64::FPR128RegClass.contains(Reg) || - AArch64::ZPRRegClass.contains(Reg) || - AArch64::PPRRegClass.contains(Reg); + AArch64::ZPRRegClass.contains(Reg); + }); + bool HasPPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) { + return AArch64::PPRRegClass.contains(Reg); }); bool HasFPRStackObjects = false; - if (!HasFPRCSRs) { - std::vector<unsigned> FrameObjects(MFI.getObjectIndexEnd()); + bool HasPPRStackObjects = false; + if (!HasFPRCSRs || SplitSVEObjects) { + enum SlotType : uint8_t { + Unknown = 0, + ZPRorFPR = 1 << 0, + PPR = 1 << 1, + GPR = 1 << 2, + LLVM_MARK_AS_BITMASK_ENUM(GPR) + }; + + // Find stack slots solely used for one kind of register (ZPR, PPR, etc.), + // based on the kinds of accesses used in the function. + SmallVector<SlotType> SlotTypes(MFI.getObjectIndexEnd(), SlotType::Unknown); for (auto &MBB : MF) { for (auto &MI : MBB) { std::optional<int> FI = getLdStFrameID(MI, MFI); - if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) { - if (MFI.isScalableStackID(*FI) || AArch64InstrInfo::isFpOrNEON(MI)) - FrameObjects[*FI] |= 2; - else - FrameObjects[*FI] |= 1; + if (!FI || FI < 0 || FI > int(SlotTypes.size())) + continue; + if (MFI.hasScalableStackID(*FI)) { + SlotTypes[*FI] |= + isPPRAccess(MI) ? SlotType::PPR : SlotType::ZPRorFPR; + } else { + SlotTypes[*FI] |= AArch64InstrInfo::isFpOrNEON(MI) + ? SlotType::ZPRorFPR + : SlotType::GPR; } } } - HasFPRStackObjects = - any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; }); + + for (int FI = 0; FI < int(SlotTypes.size()); ++FI) { + HasFPRStackObjects |= SlotTypes[FI] == SlotType::ZPRorFPR; + // For SplitSVEObjects remember that this stack slot is a predicate, this + // will be needed later when determining the frame layout. + if (SlotTypes[FI] == SlotType::PPR) { + MFI.setStackID(FI, TargetStackID::ScalablePredicateVector); + HasPPRStackObjects = true; + } + } } if (HasFPRCSRs || HasFPRStackObjects) { @@ -2277,6 +2366,78 @@ void AArch64FrameLowering::determineStackHazardSlot( << StackHazardSize << "\n"); AFI->setStackHazardSlotIndex(ID); } + + // Determine if we should use SplitSVEObjects. This should only be used if + // there's a possibility of a stack hazard between PPRs and ZPRs or FPRs. + if (SplitSVEObjects) { + if (!HasPPRCSRs && !HasPPRStackObjects) { + LLVM_DEBUG( + dbgs() << "Not using SplitSVEObjects as no PPRs are on the stack\n"); + return; + } + + if (!HasFPRCSRs && !HasFPRStackObjects) { + LLVM_DEBUG( + dbgs() + << "Not using SplitSVEObjects as no FPRs or ZPRs are on the stack\n"); + return; + } + + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (MFI.hasVarSizedObjects() || TRI->hasStackRealignment(MF)) { + LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with variable " + "sized objects or realignment\n"); + return; + } + + if (arePPRsSpilledAsZPR(MF)) { + LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with " + "-aarch64-enable-zpr-predicate-spills"); + return; + } + + // If another calling convention is explicitly set FPRs can't be promoted to + // ZPR callee-saves. + if (!is_contained({CallingConv::C, CallingConv::Fast, + CallingConv::AArch64_SVE_VectorCall}, + MF.getFunction().getCallingConv())) { + LLVM_DEBUG( + dbgs() << "Calling convention is not supported with SplitSVEObjects"); + return; + } + + [[maybe_unused]] const AArch64Subtarget &Subtarget = + MF.getSubtarget<AArch64Subtarget>(); + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Expected SVE to be available for PPRs"); + + // With SplitSVEObjects the CS hazard padding is placed between the + // PPRs and ZPRs. If there are any FPR CS there would be a hazard between + // them and the CS GRPs. Avoid this by promoting all FPR CS to ZPRs. + BitVector FPRZRegs(SavedRegs.size()); + for (size_t Reg = 0, E = SavedRegs.size(); HasFPRCSRs && Reg < E; ++Reg) { + BitVector::reference RegBit = SavedRegs[Reg]; + if (!RegBit) + continue; + unsigned SubRegIdx = 0; + if (AArch64::FPR64RegClass.contains(Reg)) + SubRegIdx = AArch64::dsub; + else if (AArch64::FPR128RegClass.contains(Reg)) + SubRegIdx = AArch64::zsub; + else + continue; + // Clear the bit for the FPR save. + RegBit = false; + // Mark that we should save the corresponding ZPR. + Register ZReg = + TRI->getMatchingSuperReg(Reg, SubRegIdx, &AArch64::ZPRRegClass); + FPRZRegs.set(ZReg); + } + SavedRegs |= FPRZRegs; + + AFI->setSplitSVEObjects(true); + LLVM_DEBUG(dbgs() << "SplitSVEObjects enabled!\n"); + } } void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, @@ -2410,6 +2571,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(AArch64::X18); } + // Determine if a Hazard slot should be used and where it should go. + // If SplitSVEObjects is used, the hazard padding is placed between the PPRs + // and ZPRs. Otherwise, it goes in the callee save area. + determineStackHazardSlot(MF, SavedRegs); + // Calculates the callee saved stack size. unsigned CSStackSize = 0; unsigned ZPRCSStackSize = 0; @@ -2434,10 +2600,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // only 64-bit GPRs can be added to SavedRegs. unsigned NumSavedRegs = SavedRegs.count(); - // Determine if a Hazard slot should be used, and increase the CSStackSize by - // StackHazardSize if so. - determineStackHazardSlot(MF, SavedRegs); - if (AFI->hasStackHazardSlotIndex()) + // If we have hazard padding in the CS area add that to the size. + if (AFI->isStackHazardIncludedInCalleeSaveArea()) CSStackSize += getStackHazardSize(MF); // Increase the callee-saved stack size if the function has streaming mode @@ -2607,7 +2771,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); // Create a hazard slot as we switch between GPR and FPR CSRs. - if (AFI->hasStackHazardSlotIndex() && + if (AFI->isStackHazardIncludedInCalleeSaveArea() && (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) && AArch64InstrInfo::isFpOrNEON(Reg)) { assert(HazardSlotIndex == std::numeric_limits<int>::max() && @@ -2646,7 +2810,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( } // Add hazard slot in the case where no FPR CSRs are present. - if (AFI->hasStackHazardSlotIndex() && + if (AFI->isStackHazardIncludedInCalleeSaveArea() && HazardSlotIndex == std::numeric_limits<int>::max()) { HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true); LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex @@ -2701,8 +2865,7 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, } static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, - AssignObjectOffsets AssignOffsets, - bool SplitSVEObjects) { + AssignObjectOffsets AssignOffsets) { MachineFrameInfo &MFI = MF.getFrameInfo(); auto *AFI = MF.getInfo<AArch64FunctionInfo>(); @@ -2713,12 +2876,12 @@ static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, // are included in the SVE vector area. uint64_t &ZPRStackTop = SVEStack.ZPRStackSize; uint64_t &PPRStackTop = - SplitSVEObjects ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize; + AFI->hasSplitSVEObjects() ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize; #ifndef NDEBUG // First process all fixed stack objects. for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) - assert(!MFI.isScalableStackID(I) && + assert(!MFI.hasScalableStackID(I) && "SVE vectors should never be passed on the stack by value, only by " "reference."); #endif @@ -3393,7 +3556,8 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI, Register Reg; FrameRegOffset = TFI->resolveFrameOffsetReference( - *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg, + *MF, FirstTagStore.Offset, false /*isFixed*/, + TargetStackID::Default /*StackID*/, Reg, /*PreferFP=*/false, /*ForSimm=*/true); FrameReg = Reg; FrameRegUpdate = std::nullopt; @@ -3733,10 +3897,12 @@ bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) { void AArch64FrameLowering::orderFrameObjects( const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { - if (!OrderFrameObjects || ObjectsToAllocate.empty()) + const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); + + if ((!OrderFrameObjects && !AFI.hasSplitSVEObjects()) || + ObjectsToAllocate.empty()) return; - const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); const MachineFrameInfo &MFI = MF.getFrameInfo(); std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd()); for (auto &Obj : ObjectsToAllocate) { @@ -3755,7 +3921,8 @@ void AArch64FrameLowering::orderFrameObjects( if (AFI.hasStackHazardSlotIndex()) { std::optional<int> FI = getLdStFrameID(MI, MFI); if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) { - if (MFI.isScalableStackID(*FI) || AArch64InstrInfo::isFpOrNEON(MI)) + if (MFI.getStackID(*FI) == TargetStackID::ScalableVector || + AArch64InstrInfo::isFpOrNEON(MI)) FrameObjects[*FI].Accesses |= FrameObject::AccessFPR; else FrameObjects[*FI].Accesses |= FrameObject::AccessGPR; @@ -4113,7 +4280,7 @@ void AArch64FrameLowering::emitRemarks( } unsigned RegTy = StackAccess::AccessType::GPR; - if (MFI.isScalableStackID(FrameIdx)) { + if (MFI.hasScalableStackID(FrameIdx)) { // SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO // spill/fill the predicate as a data vector (so are an FPR access). if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO && diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 38aa28b1..32a9bd8 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -69,8 +69,9 @@ public: bool ForSimm) const; StackOffset resolveFrameOffsetReference(const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, - bool isSVE, Register &FrameReg, - bool PreferFP, bool ForSimm) const; + TargetStackID::Value StackID, + Register &FrameReg, bool PreferFP, + bool ForSimm) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef<CalleeSavedInfo> CSI, @@ -155,7 +156,8 @@ public: /// Returns the size of the entire ZPR stackframe (calleesaves + spills). StackOffset getZPRStackSize(const MachineFunction &MF) const; - /// Returns the size of the entire PPR stackframe (calleesaves + spills). + /// Returns the size of the entire PPR stackframe (calleesaves + spills + + /// hazard padding). StackOffset getPPRStackSize(const MachineFunction &MF) const; /// Returns the size of the entire SVE stackframe (PPRs + ZPRs). diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 35bbb0c0..e7b2d20 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -7497,7 +7497,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, int FI = cast<FrameIndexSDNode>(N)->getIndex(); // We can only encode VL scaled offsets, so only fold in frame indexes // referencing SVE objects. - if (MFI.isScalableStackID(FI)) { + if (MFI.hasScalableStackID(FI)) { Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); return true; @@ -7543,7 +7543,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, int FI = cast<FrameIndexSDNode>(Base)->getIndex(); // We can only encode VL scaled offsets, so only fold in frame indexes // referencing SVE objects. - if (MFI.isScalableStackID(FI)) + if (MFI.hasScalableStackID(FI)) Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c2a482a..70d5ad7d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9256,7 +9256,7 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, (MI.getOpcode() == AArch64::ADDXri || MI.getOpcode() == AArch64::SUBXri)) { const MachineOperand &MO = MI.getOperand(1); - if (MO.isFI() && MF.getFrameInfo().isScalableStackID(MO.getIndex())) + if (MO.isFI() && MF.getFrameInfo().hasScalableStackID(MO.getIndex())) MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false, /*IsImplicit=*/true)); } @@ -29608,7 +29608,7 @@ void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const { // than doing it here in finalizeLowering. if (MFI.hasStackProtectorIndex()) { for (unsigned int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { - if (MFI.isScalableStackID(i) && + if (MFI.hasScalableStackID(i) && MFI.getObjectSSPLayout(i) != MachineFrameInfo::SSPLK_None) { MFI.setStackID(MFI.getStackProtectorIndex(), TargetStackID::ScalableVector); diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 00c104e..91e64e6 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -137,6 +137,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { uint64_t StackSizeZPR = 0; uint64_t StackSizePPR = 0; + /// Are SVE objects (vectors and predicates) split into separate regions on + /// the stack. + bool SplitSVEObjects = false; + /// HasCalculatedStackSizeSVE indicates whether StackSizeZPR/PPR is valid. bool HasCalculatedStackSizeSVE = false; @@ -336,7 +340,6 @@ public: bool isStackRealigned() const { return StackRealigned; } void setStackRealigned(bool s) { StackRealigned = s; } - bool hasCalleeSaveStackFreeSpace() const { return CalleeSaveStackHasFreeSpace; } @@ -438,6 +441,8 @@ public: } unsigned getSVECalleeSavedStackSize() const { + assert(!hasSplitSVEObjects() && + "ZPRs and PPRs are split. Use get[ZPR|PPR]CalleeSavedStackSize()"); return getZPRCalleeSavedStackSize() + getPPRCalleeSavedStackSize(); } @@ -446,6 +451,10 @@ public: return NumLocalDynamicTLSAccesses; } + bool isStackHazardIncludedInCalleeSaveArea() const { + return hasStackHazardSlotIndex() && !hasSplitSVEObjects(); + } + std::optional<bool> hasRedZone() const { return HasRedZone; } void setHasRedZone(bool s) { HasRedZone = s; } @@ -481,7 +490,14 @@ public: StackHazardCSRSlotIndex = Index; } - bool hasSplitSVEObjects() const { return false; } + bool hasSplitSVEObjects() const { return SplitSVEObjects; } + void setSplitSVEObjects(bool s) { SplitSVEObjects = s; } + + bool hasSVE_AAPCS(const MachineFunction &MF) const { + return hasSplitSVEObjects() || isSVECC() || + MF.getFunction().getCallingConv() == + CallingConv::AArch64_SVE_VectorCall; + } SMEAttrs getSMEFnAttrs() const { return SMEFnAttrs; } diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index fec350b..aed137c 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -723,38 +723,73 @@ void AArch64PrologueEmitter::emitPrologue() { StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize; StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize; + std::optional<MachineBasicBlock::iterator> ZPRCalleeSavesBegin, + ZPRCalleeSavesEnd, PPRCalleeSavesBegin, PPRCalleeSavesEnd; + StackOffset CFAOffset = StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes); MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI; if (!FPAfterSVECalleeSaves) { // Process the SVE callee-saves to find the starts/ends of the ZPR and PPR // areas. + PPRCalleeSavesBegin = AfterGPRSavesI; if (PPRCalleeSavesSize) { LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = " << PPRCalleeSavesSize.getScalable() << "\n"); - assert(isPartOfPPRCalleeSaves(AfterSVESavesI) && + assert(isPartOfPPRCalleeSaves(*PPRCalleeSavesBegin) && "Unexpected instruction"); while (isPartOfPPRCalleeSaves(AfterSVESavesI) && AfterSVESavesI != MBB.getFirstTerminator()) ++AfterSVESavesI; } + PPRCalleeSavesEnd = ZPRCalleeSavesBegin = AfterSVESavesI; if (ZPRCalleeSavesSize) { LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = " << ZPRCalleeSavesSize.getScalable() << "\n"); - assert(isPartOfZPRCalleeSaves(AfterSVESavesI) && + assert(isPartOfZPRCalleeSaves(*ZPRCalleeSavesBegin) && "Unexpected instruction"); while (isPartOfZPRCalleeSaves(AfterSVESavesI) && AfterSVESavesI != MBB.getFirstTerminator()) ++AfterSVESavesI; } + ZPRCalleeSavesEnd = AfterSVESavesI; } if (EmitAsyncCFI) emitCalleeSavedSVELocations(AfterSVESavesI); if (AFI->hasSplitSVEObjects()) { - reportFatalInternalError("not implemented yet"); + assert(!FPAfterSVECalleeSaves && + "Cannot use FPAfterSVECalleeSaves with aarch64-split-sve-objects"); + assert(!AFL.canUseRedZone(MF) && + "Cannot use redzone with aarch64-split-sve-objects"); + // TODO: Handle HasWinCFI/NeedsWinCFI? + assert(!NeedsWinCFI && + "WinCFI with aarch64-split-sve-objects is not supported"); + + // Split ZPR and PPR allocation. + // Allocate PPR callee saves + allocateStackSpace(*PPRCalleeSavesBegin, 0, PPRCalleeSavesSize, + EmitAsyncCFI && !HasFP, CFAOffset, + MFI.hasVarSizedObjects() || ZPRCalleeSavesSize || + ZPRLocalsSize || PPRLocalsSize); + CFAOffset += PPRCalleeSavesSize; + + // Allocate PPR locals + ZPR callee saves + assert(PPRCalleeSavesEnd == ZPRCalleeSavesBegin && + "Expected ZPR callee saves after PPR locals"); + allocateStackSpace(*PPRCalleeSavesEnd, RealignmentPadding, + PPRLocalsSize + ZPRCalleeSavesSize, + EmitAsyncCFI && !HasFP, CFAOffset, + MFI.hasVarSizedObjects() || ZPRLocalsSize); + CFAOffset += PPRLocalsSize + ZPRCalleeSavesSize; + + // Allocate ZPR locals + allocateStackSpace(*ZPRCalleeSavesEnd, RealignmentPadding, + ZPRLocalsSize + StackOffset::getFixed(NumBytes), + EmitAsyncCFI && !HasFP, CFAOffset, + MFI.hasVarSizedObjects()); } else { // Allocate space for the callee saves (if any). StackOffset LocalsSize = @@ -1195,7 +1230,7 @@ void AArch64PrologueEmitter::emitCalleeSavedGPRLocations( CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); for (const auto &Info : CSI) { unsigned FrameIdx = Info.getFrameIdx(); - if (MFI.isScalableStackID(FrameIdx)) + if (MFI.hasScalableStackID(FrameIdx)) continue; assert(!Info.isSpilledToReg() && "Spilling to registers not implemented"); @@ -1221,8 +1256,10 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations( AFL.getOffsetOfLocalArea(); } + StackOffset PPRStackSize = AFL.getPPRStackSize(MF); for (const auto &Info : CSI) { - if (!MFI.isScalableStackID(Info.getFrameIdx())) + int FI = Info.getFrameIdx(); + if (!MFI.hasScalableStackID(FI)) continue; // Not all unwinders may know about SVE registers, so assume the lowest @@ -1233,9 +1270,13 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations( continue; StackOffset Offset = - StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) - + StackOffset::getScalable(MFI.getObjectOffset(FI)) - StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI)); + if (AFI->hasSplitSVEObjects() && + MFI.getStackID(FI) == TargetStackID::ScalableVector) + Offset -= PPRStackSize; + CFIBuilder.insertCFIInst( createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA)); } @@ -1512,7 +1553,75 @@ void AArch64EpilogueEmitter::emitEpilogue() { emitCalleeSavedSVERestores(RestoreEnd); } } else if (AFI->hasSplitSVEObjects() && SVEStackSize) { - reportFatalInternalError("not implemented yet"); + // TODO: Support stack realigment and variable-sized objects. + assert(!AFI->isStackRealigned() && !MFI.hasVarSizedObjects() && + "unexpected stack realignment or variable sized objects with split " + "SVE stack objects"); + // SplitSVEObjects. Determine the sizes and starts/ends of the ZPR and PPR + // areas. + auto ZPRCalleeSavedSize = + StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize()); + auto PPRCalleeSavedSize = + StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize()); + StackOffset PPRLocalsSize = PPRStackSize - PPRCalleeSavedSize; + StackOffset ZPRLocalsSize = ZPRStackSize - ZPRCalleeSavedSize; + + MachineBasicBlock::iterator PPRRestoreBegin = FirstGPRRestoreI, + PPRRestoreEnd = FirstGPRRestoreI; + if (PPRCalleeSavedSize) { + PPRRestoreBegin = std::prev(PPRRestoreEnd); + while (PPRRestoreBegin != MBB.begin() && + isPartOfPPRCalleeSaves(std::prev(PPRRestoreBegin))) + --PPRRestoreBegin; + } + + MachineBasicBlock::iterator ZPRRestoreBegin = PPRRestoreBegin, + ZPRRestoreEnd = PPRRestoreBegin; + if (ZPRCalleeSavedSize) { + ZPRRestoreBegin = std::prev(ZPRRestoreEnd); + while (ZPRRestoreBegin != MBB.begin() && + isPartOfZPRCalleeSaves(std::prev(ZPRRestoreBegin))) + --ZPRRestoreBegin; + } + + auto CFAOffset = + SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize); + if (PPRCalleeSavedSize || ZPRCalleeSavedSize) { + // Deallocate the non-SVE locals first before we can deallocate (and + // restore callee saves) from the SVE area. + auto NonSVELocals = StackOffset::getFixed(NumBytes); + emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP, + NonSVELocals, TII, MachineInstr::FrameDestroy, false, + false, nullptr, EmitCFI && !HasFP, CFAOffset); + NumBytes = 0; + CFAOffset -= NonSVELocals; + } + + if (ZPRLocalsSize) { + emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP, + ZPRLocalsSize, TII, MachineInstr::FrameDestroy, false, + false, nullptr, EmitCFI && !HasFP, CFAOffset); + CFAOffset -= ZPRLocalsSize; + } + + if (PPRLocalsSize || ZPRCalleeSavedSize) { + assert(PPRRestoreBegin == ZPRRestoreEnd && + "Expected PPR restores after ZPR"); + emitFrameOffset(MBB, PPRRestoreBegin, DL, AArch64::SP, AArch64::SP, + PPRLocalsSize + ZPRCalleeSavedSize, TII, + MachineInstr::FrameDestroy, false, false, nullptr, + EmitCFI && !HasFP, CFAOffset); + CFAOffset -= PPRLocalsSize + ZPRCalleeSavedSize; + } + if (PPRCalleeSavedSize) { + emitFrameOffset(MBB, PPRRestoreEnd, DL, AArch64::SP, AArch64::SP, + PPRCalleeSavedSize, TII, MachineInstr::FrameDestroy, + false, false, nullptr, EmitCFI && !HasFP, CFAOffset); + } + + // We only emit CFI information for ZPRs so emit CFI after the ZPR restores. + if (EmitCFI) + emitCalleeSavedSVERestores(ZPRRestoreEnd); } if (!HasFP) { @@ -1670,7 +1779,7 @@ void AArch64EpilogueEmitter::emitCalleeSavedRestores( CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy); for (const auto &Info : CSI) { - if (SVE != MFI.isScalableStackID(Info.getFrameIdx())) + if (SVE != MFI.hasScalableStackID(Info.getFrameIdx())) continue; MCRegister Reg = Info.getReg(); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 3f43b70..79975b0 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -71,6 +71,7 @@ bool AArch64RegisterInfo::regNeedsCFI(MCRegister Reg, const MCPhysReg * AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); + auto &AFI = *MF->getInfo<AArch64FunctionInfo>(); if (MF->getFunction().getCallingConv() == CallingConv::GHC) // GHC set of callee saved regs is empty as all those regs are @@ -101,10 +102,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_Win_AArch64_AAPCS_SwiftTail_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall) return CSR_Win_AArch64_AAVPCS_SaveList; - if (MF->getFunction().getCallingConv() == - CallingConv::AArch64_SVE_VectorCall) - return CSR_Win_AArch64_SVE_AAPCS_SaveList; - if (MF->getInfo<AArch64FunctionInfo>()->isSVECC()) + if (AFI.hasSVE_AAPCS(*MF)) return CSR_Win_AArch64_SVE_AAPCS_SaveList; return CSR_Win_AArch64_AAPCS_SaveList; } @@ -148,7 +146,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { // This is for OSes other than Windows; Windows is a separate case further // above. return CSR_AArch64_AAPCS_X18_SaveList; - if (MF->getInfo<AArch64FunctionInfo>()->isSVECC()) + if (AFI.hasSVE_AAPCS(*MF)) return CSR_AArch64_SVE_AAPCS_SaveList; return CSR_AArch64_AAPCS_SaveList; } @@ -158,6 +156,7 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); assert(MF->getSubtarget<AArch64Subtarget>().isTargetDarwin() && "Invalid subtarget for getDarwinCalleeSavedRegs"); + auto &AFI = *MF->getInfo<AArch64FunctionInfo>(); if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check) report_fatal_error( @@ -205,7 +204,7 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const { return CSR_Darwin_AArch64_RT_AllRegs_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::Win64) return CSR_Darwin_AArch64_AAPCS_Win64_SaveList; - if (MF->getInfo<AArch64FunctionInfo>()->isSVECC()) + if (AFI.hasSVE_AAPCS(*MF)) return CSR_Darwin_AArch64_SVE_AAPCS_SaveList; return CSR_Darwin_AArch64_AAPCS_SaveList; } |