aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp492
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h30
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp16
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp62
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td22
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp15
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp20
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h83
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp444
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp15
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp2
12 files changed, 821 insertions, 383 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 79655e1..0f4bbfc3 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1610,7 +1610,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
int BaseOffset = -AFI->getTaggedBasePointerOffset();
Register FrameReg;
StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
- MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
+ MF, BaseOffset, false /*isFixed*/, TargetStackID::Default /*StackID*/,
+ FrameReg,
/*PreferFP=*/false,
/*ForSimm=*/true);
Register SrcReg = FrameReg;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index ab5c6f3..8d6eb91 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -56,15 +56,20 @@
// | async context if needed |
// | (a.k.a. "frame record") |
// |-----------------------------------| <- fp(=x29)
-// | <hazard padding> |
-// |-----------------------------------|
-// | |
-// | callee-saved fp/simd/SVE regs |
-// | |
-// |-----------------------------------|
-// | |
-// | SVE stack objects |
-// | |
+// Default SVE stack layout Split SVE objects
+// (aarch64-split-sve-objects=false) (aarch64-split-sve-objects=true)
+// |-----------------------------------| |-----------------------------------|
+// | <hazard padding> | | callee-saved PPR registers |
+// |-----------------------------------| |-----------------------------------|
+// | | | PPR stack objects |
+// | callee-saved fp/simd/SVE regs | |-----------------------------------|
+// | | | <hazard padding> |
+// |-----------------------------------| |-----------------------------------|
+// | | | callee-saved ZPR/FPR registers |
+// | SVE stack objects | |-----------------------------------|
+// | | | ZPR stack objects |
+// |-----------------------------------| |-----------------------------------|
+// ^ NB: FPR CSRs are promoted to ZPRs
// |-----------------------------------|
// |.empty.space.to.make.part.below....|
// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
@@ -274,6 +279,11 @@ static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
cl::desc("sort stack allocations"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ SplitSVEObjects("aarch64-split-sve-objects",
+ cl::desc("Split allocation of ZPR & PPR objects"),
+ cl::init(false), cl::Hidden);
+
cl::opt<bool> EnableHomogeneousPrologEpilog(
"homogeneous-prolog-epilog", cl::Hidden,
cl::desc("Emit homogeneous prologue and epilogue for the size "
@@ -324,7 +334,41 @@ AArch64FrameLowering::getArgumentStackToRestore(MachineFunction &MF,
static bool produceCompactUnwindFrame(const AArch64FrameLowering &,
MachineFunction &MF);
-// Conservatively, returns true if the function is likely to have an SVE vectors
+enum class AssignObjectOffsets { No, Yes };
+/// Process all the SVE stack objects and the SVE stack size and offsets for
+/// each object. If AssignOffsets is "Yes", the offsets get assigned (and SVE
+/// stack sizes set). Returns the size of the SVE stack.
+static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
+ AssignObjectOffsets AssignOffsets);
+
+static unsigned getStackHazardSize(const MachineFunction &MF) {
+ return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
+}
+
+/// Returns true if PPRs are spilled as ZPRs.
+static bool arePPRsSpilledAsZPR(const MachineFunction &MF) {
+ return MF.getSubtarget().getRegisterInfo()->getSpillSize(
+ AArch64::PPRRegClass) == 16;
+}
+
+StackOffset
+AArch64FrameLowering::getZPRStackSize(const MachineFunction &MF) const {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return StackOffset::getScalable(AFI->getStackSizeZPR());
+}
+
+StackOffset
+AArch64FrameLowering::getPPRStackSize(const MachineFunction &MF) const {
+ // With split SVE objects, the hazard padding is added to the PPR region,
+ // which places it between the [GPR, PPR] area and the [ZPR, FPR] area. This
+ // avoids hazards between both GPRs and FPRs and ZPRs and PPRs.
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return StackOffset::get(AFI->hasSplitSVEObjects() ? getStackHazardSize(MF)
+ : 0,
+ AFI->getStackSizePPR());
+}
+
+// Conservatively, returns true if the function is likely to have SVE vectors
// on the stack. This function is safe to be called before callee-saves or
// object offsets have been determined.
static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL,
@@ -338,7 +382,7 @@ static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL,
const MachineFrameInfo &MFI = MF.getFrameInfo();
for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd(); FI++) {
- if (MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ if (MFI.hasScalableStackID(FI))
return true;
}
@@ -482,13 +526,6 @@ AArch64FrameLowering::getFixedObjectSize(const MachineFunction &MF,
}
}
-/// Returns the size of the entire SVE stackframe (calleesaves + spills).
-StackOffset
-AArch64FrameLowering::getSVEStackSize(const MachineFunction &MF) const {
- const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
-}
-
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
@@ -514,7 +551,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
!Subtarget.hasSVE();
return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
- getSVEStackSize(MF) || LowerQRegCopyThroughMem);
+ AFI->hasSVEStackSize() || LowerQRegCopyThroughMem);
}
/// hasFPImpl - Return true if the specified function should have a dedicated
@@ -557,7 +594,7 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const {
// CFA in either of these cases.
if (AFI.needsDwarfUnwindInfo(MF) &&
((requiresSaveVG(MF) || AFI.getSMEFnAttrs().hasStreamingBody()) &&
- (!AFI.hasCalculatedStackSizeSVE() || AFI.getStackSizeSVE() > 0)))
+ (!AFI.hasCalculatedStackSizeSVE() || AFI.hasSVEStackSize())))
return true;
// With large callframes around we may need to use FP to access the scavenging
// emergency spillslot.
@@ -1126,10 +1163,6 @@ static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
-static unsigned getStackHazardSize(const MachineFunction &MF) {
- return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
-}
-
void AArch64FrameLowering::emitPacRetPlusLeafHardening(
MachineFunction &MF) const {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -1212,7 +1245,9 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
- StackOffset SVEStackSize = getSVEStackSize(MF);
+ StackOffset ZPRStackSize = getZPRStackSize(MF);
+ StackOffset PPRStackSize = getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// For VLA-area objects, just emit an offset at the end of the stack frame.
// Whilst not quite correct, these objects do live at the end of the frame and
@@ -1228,11 +1263,21 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
bool FPAfterSVECalleeSaves =
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
- if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+ if (MFI.hasScalableStackID(FI)) {
if (FPAfterSVECalleeSaves &&
- -ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize())
+ -ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) {
+ assert(!AFI->hasSplitSVEObjects() &&
+ "split-sve-objects not supported with FPAfterSVECalleeSaves");
return StackOffset::getScalable(ObjectOffset);
- return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
+ }
+ StackOffset AccessOffset{};
+ // The scalable vectors are below (lower address) the scalable predicates
+ // with split SVE objects, so we must subtract the size of the predicates.
+ if (AFI->hasSplitSVEObjects() &&
+ MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ AccessOffset = -PPRStackSize;
+ return AccessOffset +
+ StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
ObjectOffset);
}
@@ -1294,14 +1339,15 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
bool isFixed = MFI.isFixedObjectIndex(FI);
- bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector;
- return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
- PreferFP, ForSimm);
+ auto StackID = static_cast<TargetStackID::Value>(MFI.getStackID(FI));
+ return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, StackID,
+ FrameReg, PreferFP, ForSimm);
}
StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
- const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
- Register &FrameReg, bool PreferFP, bool ForSimm) const {
+ const MachineFunction &MF, int64_t ObjectOffset, bool isFixed,
+ TargetStackID::Value StackID, Register &FrameReg, bool PreferFP,
+ bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
@@ -1312,8 +1358,11 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
+ bool isSVE = MFI.isScalableStackID(StackID);
- const StackOffset &SVEStackSize = getSVEStackSize(MF);
+ StackOffset ZPRStackSize = getZPRStackSize(MF);
+ StackOffset PPRStackSize = getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -1388,12 +1437,25 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
if (isSVE) {
- StackOffset FPOffset =
- StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
+ StackOffset FPOffset = StackOffset::get(
+ -AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
StackOffset SPOffset =
SVEStackSize +
StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
ObjectOffset);
+
+ // With split SVE objects the ObjectOffset is relative to the split area
+ // (i.e. the PPR area or ZPR area respectively).
+ if (AFI->hasSplitSVEObjects() && StackID == TargetStackID::ScalableVector) {
+ // If we're accessing an SVE vector with split SVE objects...
+ // - From the FP we need to move down past the PPR area:
+ FPOffset -= PPRStackSize;
+ // - From the SP we only need to move up to the ZPR area:
+ SPOffset -= PPRStackSize;
+ // Note: `SPOffset = SVEStackSize + ...`, so `-= PPRStackSize` results in
+ // `SPOffset = ZPRStackSize + ...`.
+ }
+
if (FPAfterSVECalleeSaves) {
FPOffset += StackOffset::getScalable(AFI->getSVECalleeSavedStackSize());
if (-ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize()) {
@@ -1401,6 +1463,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
SPOffset += StackOffset::getFixed(AFI->getCalleeSavedStackSize());
}
}
+
// Always use the FP for SVE spills if available and beneficial.
if (hasFP(MF) && (SPOffset.getFixed() ||
FPOffset.getScalable() < SPOffset.getScalable() ||
@@ -1408,13 +1471,13 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
}
-
FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
: (unsigned)AArch64::SP;
+
return SPOffset;
}
- StackOffset ScalableOffset = {};
+ StackOffset SVEAreaOffset = {};
if (FPAfterSVECalleeSaves) {
// In this stack layout, the FP is in between the callee saves and other
// SVE allocations.
@@ -1422,25 +1485,25 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
StackOffset::getScalable(AFI->getSVECalleeSavedStackSize());
if (UseFP) {
if (isFixed)
- ScalableOffset = SVECalleeSavedStack;
+ SVEAreaOffset = SVECalleeSavedStack;
else if (!isCSR)
- ScalableOffset = SVECalleeSavedStack - SVEStackSize;
+ SVEAreaOffset = SVECalleeSavedStack - SVEStackSize;
} else {
if (isFixed)
- ScalableOffset = SVEStackSize;
+ SVEAreaOffset = SVEStackSize;
else if (isCSR)
- ScalableOffset = SVEStackSize - SVECalleeSavedStack;
+ SVEAreaOffset = SVEStackSize - SVECalleeSavedStack;
}
} else {
if (UseFP && !(isFixed || isCSR))
- ScalableOffset = -SVEStackSize;
+ SVEAreaOffset = -SVEStackSize;
if (!UseFP && (isFixed || isCSR))
- ScalableOffset = SVEStackSize;
+ SVEAreaOffset = SVEStackSize;
}
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
- return StackOffset::getFixed(FPOffset) + ScalableOffset;
+ return StackOffset::getFixed(FPOffset) + SVEAreaOffset;
}
// Use the base pointer if we have one.
@@ -1457,7 +1520,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
Offset -= AFI->getLocalStackSize();
}
- return StackOffset::getFixed(Offset) + ScalableOffset;
+ return StackOffset::getFixed(Offset) + SVEAreaOffset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
@@ -1614,11 +1677,25 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
RegInc = -1;
FirstReg = Count - 1;
}
+
bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
- int ScalableByteOffset =
- FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize();
+
+ int ZPRByteOffset = 0;
+ int PPRByteOffset = 0;
+ bool SplitPPRs = AFI->hasSplitSVEObjects();
+ if (SplitPPRs) {
+ ZPRByteOffset = AFI->getZPRCalleeSavedStackSize();
+ PPRByteOffset = AFI->getPPRCalleeSavedStackSize();
+ } else if (!FPAfterSVECalleeSaves) {
+ ZPRByteOffset =
+ AFI->getZPRCalleeSavedStackSize() + AFI->getPPRCalleeSavedStackSize();
+ // Unused: Everything goes in ZPR space.
+ PPRByteOffset = 0;
+ }
+
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
Register LastReg = 0;
+ bool HasCSHazardPadding = AFI->hasStackHazardSlotIndex() && !SplitPPRs;
// When iterating backwards, the loop condition relies on unsigned wraparound.
for (unsigned i = FirstReg; i < Count; i += RegInc) {
@@ -1647,8 +1724,12 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
llvm_unreachable("Unsupported register class.");
}
+ int &ScalableByteOffset = RPI.Type == RegPairInfo::PPR && SplitPPRs
+ ? PPRByteOffset
+ : ZPRByteOffset;
+
// Add the stack hazard size as we transition from GPR->FPR CSRs.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (HasCSHazardPadding &&
(!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
AArch64InstrInfo::isFpOrNEON(RPI.Reg1))
ByteOffset += StackFillDir * StackHazardSize;
@@ -1656,7 +1737,7 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
int Scale = TRI->getSpillSize(*RPI.RC);
// Add the next reg to the pair if it is in the same register class.
- if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) {
+ if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
MCRegister NextReg = CSI[i + RegInc].getReg();
bool IsFirst = i == FirstReg;
switch (RPI.Type) {
@@ -2021,10 +2102,14 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
}
// Update the StackIDs of the SVE stack slots.
MachineFrameInfo &MFI = MF.getFrameInfo();
- if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) {
+ if (RPI.Type == RegPairInfo::ZPR) {
MFI.setStackID(FrameIdxReg1, TargetStackID::ScalableVector);
if (RPI.isPaired())
MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector);
+ } else if (RPI.Type == RegPairInfo::PPR) {
+ MFI.setStackID(FrameIdxReg1, TargetStackID::ScalablePredicateVector);
+ if (RPI.isPaired())
+ MFI.setStackID(FrameIdxReg2, TargetStackID::ScalablePredicateVector);
}
}
return true;
@@ -2199,6 +2284,13 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
return getMMOFrameID(*MI.memoperands_begin(), MFI);
}
+// Returns true if the LDST MachineInstr \p MI is a PPR access.
+static bool isPPRAccess(const MachineInstr &MI) {
+ return MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
+ MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO &&
+ AArch64::PPRRegClass.contains(MI.getOperand(0).getReg());
+}
+
// Check if a Hazard slot is needed for the current function, and if so create
// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
// which can be used to determine if any hazard padding is needed.
@@ -2222,26 +2314,50 @@ void AArch64FrameLowering::determineStackHazardSlot(
bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
return AArch64::FPR64RegClass.contains(Reg) ||
AArch64::FPR128RegClass.contains(Reg) ||
- AArch64::ZPRRegClass.contains(Reg) ||
- AArch64::PPRRegClass.contains(Reg);
+ AArch64::ZPRRegClass.contains(Reg);
+ });
+ bool HasPPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
+ return AArch64::PPRRegClass.contains(Reg);
});
bool HasFPRStackObjects = false;
- if (!HasFPRCSRs) {
- std::vector<unsigned> FrameObjects(MFI.getObjectIndexEnd());
+ bool HasPPRStackObjects = false;
+ if (!HasFPRCSRs || SplitSVEObjects) {
+ enum SlotType : uint8_t {
+ Unknown = 0,
+ ZPRorFPR = 1 << 0,
+ PPR = 1 << 1,
+ GPR = 1 << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(GPR)
+ };
+
+ // Find stack slots solely used for one kind of register (ZPR, PPR, etc.),
+ // based on the kinds of accesses used in the function.
+ SmallVector<SlotType> SlotTypes(MFI.getObjectIndexEnd(), SlotType::Unknown);
for (auto &MBB : MF) {
for (auto &MI : MBB) {
std::optional<int> FI = getLdStFrameID(MI, MFI);
- if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
- if (MFI.getStackID(*FI) == TargetStackID::ScalableVector ||
- AArch64InstrInfo::isFpOrNEON(MI))
- FrameObjects[*FI] |= 2;
- else
- FrameObjects[*FI] |= 1;
+ if (!FI || FI < 0 || FI > int(SlotTypes.size()))
+ continue;
+ if (MFI.hasScalableStackID(*FI)) {
+ SlotTypes[*FI] |=
+ isPPRAccess(MI) ? SlotType::PPR : SlotType::ZPRorFPR;
+ } else {
+ SlotTypes[*FI] |= AArch64InstrInfo::isFpOrNEON(MI)
+ ? SlotType::ZPRorFPR
+ : SlotType::GPR;
}
}
}
- HasFPRStackObjects =
- any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; });
+
+ for (int FI = 0; FI < int(SlotTypes.size()); ++FI) {
+ HasFPRStackObjects |= SlotTypes[FI] == SlotType::ZPRorFPR;
+ // For SplitSVEObjects remember that this stack slot is a predicate, this
+ // will be needed later when determining the frame layout.
+ if (SlotTypes[FI] == SlotType::PPR) {
+ MFI.setStackID(FI, TargetStackID::ScalablePredicateVector);
+ HasPPRStackObjects = true;
+ }
+ }
}
if (HasFPRCSRs || HasFPRStackObjects) {
@@ -2250,6 +2366,78 @@ void AArch64FrameLowering::determineStackHazardSlot(
<< StackHazardSize << "\n");
AFI->setStackHazardSlotIndex(ID);
}
+
+ // Determine if we should use SplitSVEObjects. This should only be used if
+ // there's a possibility of a stack hazard between PPRs and ZPRs or FPRs.
+ if (SplitSVEObjects) {
+ if (!HasPPRCSRs && !HasPPRStackObjects) {
+ LLVM_DEBUG(
+ dbgs() << "Not using SplitSVEObjects as no PPRs are on the stack\n");
+ return;
+ }
+
+ if (!HasFPRCSRs && !HasFPRStackObjects) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Not using SplitSVEObjects as no FPRs or ZPRs are on the stack\n");
+ return;
+ }
+
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (MFI.hasVarSizedObjects() || TRI->hasStackRealignment(MF)) {
+ LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with variable "
+ "sized objects or realignment\n");
+ return;
+ }
+
+ if (arePPRsSpilledAsZPR(MF)) {
+ LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with "
+ "-aarch64-enable-zpr-predicate-spills");
+ return;
+ }
+
+ // If another calling convention is explicitly set FPRs can't be promoted to
+ // ZPR callee-saves.
+ if (!is_contained({CallingConv::C, CallingConv::Fast,
+ CallingConv::AArch64_SVE_VectorCall},
+ MF.getFunction().getCallingConv())) {
+ LLVM_DEBUG(
+ dbgs() << "Calling convention is not supported with SplitSVEObjects");
+ return;
+ }
+
+ [[maybe_unused]] const AArch64Subtarget &Subtarget =
+ MF.getSubtarget<AArch64Subtarget>();
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Expected SVE to be available for PPRs");
+
+ // With SplitSVEObjects the CS hazard padding is placed between the
+ // PPRs and ZPRs. If there are any FPR CS there would be a hazard between
+ // them and the CS GRPs. Avoid this by promoting all FPR CS to ZPRs.
+ BitVector FPRZRegs(SavedRegs.size());
+ for (size_t Reg = 0, E = SavedRegs.size(); HasFPRCSRs && Reg < E; ++Reg) {
+ BitVector::reference RegBit = SavedRegs[Reg];
+ if (!RegBit)
+ continue;
+ unsigned SubRegIdx = 0;
+ if (AArch64::FPR64RegClass.contains(Reg))
+ SubRegIdx = AArch64::dsub;
+ else if (AArch64::FPR128RegClass.contains(Reg))
+ SubRegIdx = AArch64::zsub;
+ else
+ continue;
+ // Clear the bit for the FPR save.
+ RegBit = false;
+ // Mark that we should save the corresponding ZPR.
+ Register ZReg =
+ TRI->getMatchingSuperReg(Reg, SubRegIdx, &AArch64::ZPRRegClass);
+ FPRZRegs.set(ZReg);
+ }
+ SavedRegs |= FPRZRegs;
+
+ AFI->setSplitSVEObjects(true);
+ LLVM_DEBUG(dbgs() << "SplitSVEObjects enabled!\n");
+ }
}
void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
@@ -2260,10 +2448,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
@@ -2382,17 +2571,26 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(AArch64::X18);
}
+ // Determine if a Hazard slot should be used and where it should go.
+ // If SplitSVEObjects is used, the hazard padding is placed between the PPRs
+ // and ZPRs. Otherwise, it goes in the callee save area.
+ determineStackHazardSlot(MF, SavedRegs);
+
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
- unsigned SVECSStackSize = 0;
+ unsigned ZPRCSStackSize = 0;
+ unsigned PPRCSStackSize = 0;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (unsigned Reg : SavedRegs.set_bits()) {
auto *RC = TRI->getMinimalPhysRegClass(Reg);
assert(RC && "expected register class!");
auto SpillSize = TRI->getSpillSize(*RC);
- if (AArch64::PPRRegClass.contains(Reg) ||
- AArch64::ZPRRegClass.contains(Reg))
- SVECSStackSize += SpillSize;
+ bool IsZPR = AArch64::ZPRRegClass.contains(Reg);
+ bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg);
+ if (IsZPR || (IsPPR && arePPRsSpilledAsZPR(MF)))
+ ZPRCSStackSize += SpillSize;
+ else if (IsPPR)
+ PPRCSStackSize += SpillSize;
else
CSStackSize += SpillSize;
}
@@ -2402,17 +2600,15 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// only 64-bit GPRs can be added to SavedRegs.
unsigned NumSavedRegs = SavedRegs.count();
+ // If we have hazard padding in the CS area add that to the size.
+ if (AFI->isStackHazardIncludedInCalleeSaveArea())
+ CSStackSize += getStackHazardSize(MF);
+
// Increase the callee-saved stack size if the function has streaming mode
// changes, as we will need to spill the value of the VG register.
if (requiresSaveVG(MF))
CSStackSize += 8;
- // Determine if a Hazard slot should be used, and increase the CSStackSize by
- // StackHazardSize if so.
- determineStackHazardSlot(MF, SavedRegs);
- if (AFI->hasStackHazardSlotIndex())
- CSStackSize += getStackHazardSize(MF);
-
// If we must call __arm_get_current_vg in the prologue preserve the LR.
if (requiresSaveVG(MF) && !Subtarget.hasSVE())
SavedRegs.set(AArch64::LR);
@@ -2433,8 +2629,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
});
// If any callee-saved registers are used, the frame cannot be eliminated.
- int64_t SVEStackSize =
- alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
+ auto [ZPRLocalStackSize, PPRLocalStackSize] =
+ determineSVEStackSizes(MF, AssignObjectOffsets::No);
+ uint64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize;
+ uint64_t SVEStackSize =
+ alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16);
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
// The CSR spill slots have not been allocated yet, so estimateStackSize
@@ -2519,7 +2718,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// instructions.
AFI->setCalleeSavedStackSize(AlignedCSStackSize);
AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
- AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
+ AFI->setSVECalleeSavedStackSize(ZPRCSStackSize, alignTo(PPRCSStackSize, 16));
}
bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
@@ -2572,7 +2771,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
// Create a hazard slot as we switch between GPR and FPR CSRs.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (AFI->isStackHazardIncludedInCalleeSaveArea() &&
(!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
AArch64InstrInfo::isFpOrNEON(Reg)) {
assert(HazardSlotIndex == std::numeric_limits<int>::max() &&
@@ -2611,7 +2810,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
}
// Add hazard slot in the case where no FPR CSRs are present.
- if (AFI->hasStackHazardSlotIndex() &&
+ if (AFI->isStackHazardIncludedInCalleeSaveArea() &&
HazardSlotIndex == std::numeric_limits<int>::max()) {
HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true);
LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
@@ -2658,7 +2857,6 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
assert((Max == std::numeric_limits<int>::min() ||
Max + 1 == CS.getFrameIdx()) &&
"SVE CalleeSaves are not consecutive");
-
Min = std::min(Min, CS.getFrameIdx());
Max = std::max(Max, CS.getFrameIdx());
}
@@ -2666,43 +2864,64 @@ static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
return Min != std::numeric_limits<int>::max();
}
-// Process all the SVE stack objects and determine offsets for each
-// object. If AssignOffsets is true, the offsets get assigned.
-// Fills in the first and last callee-saved frame indices into
-// Min/MaxCSFrameIndex, respectively.
-// Returns the size of the stack.
-static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
- int &MinCSFrameIndex,
- int &MaxCSFrameIndex,
- bool AssignOffsets) {
+static SVEStackSizes determineSVEStackSizes(MachineFunction &MF,
+ AssignObjectOffsets AssignOffsets) {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ SVEStackSizes SVEStack{};
+
+ // With SplitSVEObjects we maintain separate stack offsets for predicates
+ // (PPRs) and SVE vectors (ZPRs). When SplitSVEObjects is disabled predicates
+ // are included in the SVE vector area.
+ uint64_t &ZPRStackTop = SVEStack.ZPRStackSize;
+ uint64_t &PPRStackTop =
+ AFI->hasSplitSVEObjects() ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize;
+
#ifndef NDEBUG
// First process all fixed stack objects.
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
- assert(MFI.getStackID(I) != TargetStackID::ScalableVector &&
+ assert(!MFI.hasScalableStackID(I) &&
"SVE vectors should never be passed on the stack by value, only by "
"reference.");
#endif
- auto Assign = [&MFI](int FI, int64_t Offset) {
+ auto AllocateObject = [&](int FI) {
+ uint64_t &StackTop = MFI.getStackID(FI) == TargetStackID::ScalableVector
+ ? ZPRStackTop
+ : PPRStackTop;
+
+ // FIXME: Given that the length of SVE vectors is not necessarily a power of
+ // two, we'd need to align every object dynamically at runtime if the
+ // alignment is larger than 16. This is not yet supported.
+ Align Alignment = MFI.getObjectAlign(FI);
+ if (Alignment > Align(16))
+ report_fatal_error(
+ "Alignment of scalable vectors > 16 bytes is not yet supported");
+
+ StackTop += MFI.getObjectSize(FI);
+ StackTop = alignTo(StackTop, Alignment);
+
+ assert(StackTop < std::numeric_limits<int64_t>::max() &&
+ "SVE StackTop far too large?!");
+
+ int64_t Offset = -int64_t(StackTop);
+ if (AssignOffsets == AssignObjectOffsets::Yes)
+ MFI.setObjectOffset(FI, Offset);
+
LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
- MFI.setObjectOffset(FI, Offset);
};
- int64_t Offset = 0;
-
// Then process all callee saved slots.
+ int MinCSFrameIndex, MaxCSFrameIndex;
if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
- // Assign offsets to the callee save slots.
- for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
- Offset += MFI.getObjectSize(I);
- Offset = alignTo(Offset, MFI.getObjectAlign(I));
- if (AssignOffsets)
- Assign(I, -Offset);
- }
+ for (int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI)
+ AllocateObject(FI);
}
- // Ensure that the Callee-save area is aligned to 16bytes.
- Offset = alignTo(Offset, Align(16U));
+ // Ensure the CS area is 16-byte aligned.
+ PPRStackTop = alignTo(PPRStackTop, Align(16U));
+ ZPRStackTop = alignTo(ZPRStackTop, Align(16U));
// Create a buffer of SVE objects to allocate and sort it.
SmallVector<int, 8> ObjectsToAllocate;
@@ -2715,48 +2934,31 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector)
ObjectsToAllocate.push_back(StackProtectorFI);
}
- for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
- unsigned StackID = MFI.getStackID(I);
- if (StackID != TargetStackID::ScalableVector)
- continue;
- if (I == StackProtectorFI)
+
+ for (int FI = 0, E = MFI.getObjectIndexEnd(); FI != E; ++FI) {
+ if (FI == StackProtectorFI || MFI.isDeadObjectIndex(FI))
continue;
- if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
+ if (MaxCSFrameIndex >= FI && FI >= MinCSFrameIndex)
continue;
- if (MFI.isDeadObjectIndex(I))
+
+ if (MFI.getStackID(FI) != TargetStackID::ScalableVector &&
+ MFI.getStackID(FI) != TargetStackID::ScalablePredicateVector)
continue;
- ObjectsToAllocate.push_back(I);
+ ObjectsToAllocate.push_back(FI);
}
// Allocate all SVE locals and spills
- for (unsigned FI : ObjectsToAllocate) {
- Align Alignment = MFI.getObjectAlign(FI);
- // FIXME: Given that the length of SVE vectors is not necessarily a power of
- // two, we'd need to align every object dynamically at runtime if the
- // alignment is larger than 16. This is not yet supported.
- if (Alignment > Align(16))
- report_fatal_error(
- "Alignment of scalable vectors > 16 bytes is not yet supported");
-
- Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment);
- if (AssignOffsets)
- Assign(FI, -Offset);
- }
+ for (unsigned FI : ObjectsToAllocate)
+ AllocateObject(FI);
- return Offset;
-}
+ PPRStackTop = alignTo(PPRStackTop, Align(16U));
+ ZPRStackTop = alignTo(ZPRStackTop, Align(16U));
-int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
- MachineFrameInfo &MFI) const {
- int MinCSFrameIndex, MaxCSFrameIndex;
- return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
-}
+ if (AssignOffsets == AssignObjectOffsets::Yes)
+ AFI->setStackSizeSVE(SVEStack.ZPRStackSize, SVEStack.PPRStackSize);
-int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
- MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
- return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
- true);
+ return SVEStack;
}
/// Attempts to scavenge a register from \p ScavengeableRegs given the used
@@ -3070,12 +3272,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
"Upwards growing stack unsupported");
- int MinCSFrameIndex, MaxCSFrameIndex;
- int64_t SVEStackSize =
- assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
-
- AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
- AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
+ (void)determineSVEStackSizes(MF, AssignObjectOffsets::Yes);
// If this function isn't doing Win64-style C++ EH, we don't need to do
// anything.
@@ -3359,7 +3556,8 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
Register Reg;
FrameRegOffset = TFI->resolveFrameOffsetReference(
- *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg,
+ *MF, FirstTagStore.Offset, false /*isFixed*/,
+ TargetStackID::Default /*StackID*/, Reg,
/*PreferFP=*/false, /*ForSimm=*/true);
FrameReg = Reg;
FrameRegUpdate = std::nullopt;
@@ -3597,7 +3795,7 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
// Go to common code if we cannot provide sp + offset.
if (MFI.hasVarSizedObjects() ||
- MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() ||
+ MF.getInfo<AArch64FunctionInfo>()->hasSVEStackSize() ||
MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
return getFrameIndexReference(MF, FI, FrameReg);
@@ -3699,10 +3897,12 @@ bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
void AArch64FrameLowering::orderFrameObjects(
const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
- if (!OrderFrameObjects || ObjectsToAllocate.empty())
+ const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+
+ if ((!OrderFrameObjects && !AFI.hasSplitSVEObjects()) ||
+ ObjectsToAllocate.empty())
return;
- const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
for (auto &Obj : ObjectsToAllocate) {
@@ -4080,7 +4280,7 @@ void AArch64FrameLowering::emitRemarks(
}
unsigned RegTy = StackAccess::AccessType::GPR;
- if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
+ if (MFI.hasScalableStackID(FrameIdx)) {
// SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO
// spill/fill the predicate as a data vector (so are an FPR access).
if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO &&
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 7bba053..32a9bd8 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -24,6 +24,11 @@ class AArch64FunctionInfo;
class AArch64PrologueEmitter;
class AArch64EpilogueEmitter;
+struct SVEStackSizes {
+ uint64_t ZPRStackSize{0};
+ uint64_t PPRStackSize{0};
+};
+
class AArch64FrameLowering : public TargetFrameLowering {
public:
explicit AArch64FrameLowering()
@@ -64,8 +69,9 @@ public:
bool ForSimm) const;
StackOffset resolveFrameOffsetReference(const MachineFunction &MF,
int64_t ObjectOffset, bool isFixed,
- bool isSVE, Register &FrameReg,
- bool PreferFP, bool ForSimm) const;
+ TargetStackID::Value StackID,
+ Register &FrameReg, bool PreferFP,
+ bool ForSimm) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI,
@@ -124,6 +130,7 @@ public:
return false;
case TargetStackID::Default:
case TargetStackID::ScalableVector:
+ case TargetStackID::ScalablePredicateVector:
case TargetStackID::NoAlloc:
return true;
}
@@ -132,7 +139,8 @@ public:
bool isStackIdSafeForLocalArea(unsigned StackId) const override {
// We don't support putting SVE objects into the pre-allocated local
// frame block at the moment.
- return StackId != TargetStackID::ScalableVector;
+ return (StackId != TargetStackID::ScalableVector &&
+ StackId != TargetStackID::ScalablePredicateVector);
}
void
@@ -145,7 +153,17 @@ public:
bool requiresSaveVG(const MachineFunction &MF) const;
- StackOffset getSVEStackSize(const MachineFunction &MF) const;
+ /// Returns the size of the entire ZPR stackframe (calleesaves + spills).
+ StackOffset getZPRStackSize(const MachineFunction &MF) const;
+
+ /// Returns the size of the entire PPR stackframe (calleesaves + spills +
+ /// hazard padding).
+ StackOffset getPPRStackSize(const MachineFunction &MF) const;
+
+ /// Returns the size of the entire SVE stackframe (PPRs + ZPRs).
+ StackOffset getSVEStackSize(const MachineFunction &MF) const {
+ return getZPRStackSize(MF) + getPPRStackSize(MF);
+ }
friend class AArch64PrologueEpilogueCommon;
friend class AArch64PrologueEmitter;
@@ -165,10 +183,6 @@ private:
/// Returns true if CSRs should be paired.
bool producePairRegisters(MachineFunction &MF) const;
- int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const;
- int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
- int &MinCSFrameIndex,
- int &MaxCSFrameIndex) const;
/// Make a determination whether a Hazard slot is used and create it if
/// needed.
void determineStackHazardSlot(MachineFunction &MF,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 6a1b06e..e7b2d20 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2089,7 +2089,8 @@ void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
return;
- SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
+ SDValue Chain = Node->getOperand(0);
+ SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
SDLoc DL(Node);
EVT VT = Node->getValueType(0);
@@ -2110,14 +2111,15 @@ void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
unsigned NumOutVecs,
unsigned Opc) {
-
SDValue ZtValue;
- SmallVector<SDValue, 4> Ops;
if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
return;
- Ops.push_back(ZtValue);
- Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)}));
+ SDValue Chain = Node->getOperand(0);
+ SDValue Ops[] = {ZtValue,
+ createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
+ Chain};
+
SDLoc DL(Node);
EVT VT = Node->getValueType(0);
@@ -7495,7 +7497,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
int FI = cast<FrameIndexSDNode>(N)->getIndex();
// We can only encode VL scaled offsets, so only fold in frame indexes
// referencing SVE objects.
- if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+ if (MFI.hasScalableStackID(FI)) {
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
return true;
@@ -7541,7 +7543,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
// We can only encode VL scaled offsets, so only fold in frame indexes
// referencing SVE objects.
- if (MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ if (MFI.hasScalableStackID(FI))
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 45f5235..70d5ad7d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1537,6 +1537,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
+ setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
@@ -6617,7 +6618,6 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
"llvm.eh.recoverfp must take a function as the first argument");
return IncomingFPOp;
}
-
case Intrinsic::aarch64_neon_vsri:
case Intrinsic::aarch64_neon_vsli:
case Intrinsic::aarch64_sve_sri:
@@ -9256,8 +9256,7 @@ void AArch64TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
(MI.getOpcode() == AArch64::ADDXri ||
MI.getOpcode() == AArch64::SUBXri)) {
const MachineOperand &MO = MI.getOperand(1);
- if (MO.isFI() && MF.getFrameInfo().getStackID(MO.getIndex()) ==
- TargetStackID::ScalableVector)
+ if (MO.isFI() && MF.getFrameInfo().hasScalableStackID(MO.getIndex()))
MI.addOperand(MachineOperand::CreateReg(AArch64::VG, /*IsDef=*/false,
/*IsImplicit=*/true));
}
@@ -9704,8 +9703,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
MachineFrameInfo &MFI = MF.getFrameInfo();
int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
- if (isScalable)
- MFI.setStackID(FI, TargetStackID::ScalableVector);
+ if (isScalable) {
+ bool IsPred = VA.getValVT() == MVT::aarch64svcount ||
+ VA.getValVT().getVectorElementType() == MVT::i1;
+ MFI.setStackID(FI, IsPred ? TargetStackID::ScalablePredicateVector
+ : TargetStackID::ScalableVector);
+ }
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
SDValue Ptr = DAG.getFrameIndex(
@@ -15154,9 +15157,7 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
: Shift.getOperand(1);
unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
- SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Imm);
-
- return ResultSLI;
+ return DAG.getNode(Inst, DL, VT, X, Y, Imm);
}
static SDValue tryLowerToBSL(SDValue N, SelectionDAG &DAG) {
@@ -27234,6 +27235,21 @@ static bool isLanes1toNKnownZero(SDValue Op) {
}
}
+// Return true if the vector operation can guarantee that the first lane of its
+// result is active.
+static bool isLane0KnownActive(SDValue Op) {
+ switch (Op.getOpcode()) {
+ default:
+ return false;
+ case AArch64ISD::REINTERPRET_CAST:
+ return isLane0KnownActive(Op->getOperand(0));
+ case ISD::SPLAT_VECTOR:
+ return isOneConstant(Op.getOperand(0));
+ case AArch64ISD::PTRUE:
+ return Op.getConstantOperandVal(0) == AArch64SVEPredPattern::all;
+ };
+}
+
static SDValue removeRedundantInsertVectorElt(SDNode *N) {
assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
SDValue InsertVec = N->getOperand(0);
@@ -27519,6 +27535,32 @@ static SDValue performMULLCombine(SDNode *N,
return SDValue();
}
+static SDValue performPTestFirstCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ SDLoc DL(N);
+ auto Mask = N->getOperand(0);
+ auto Pred = N->getOperand(1);
+
+ if (!isLane0KnownActive(Mask))
+ return SDValue();
+
+ if (Pred->getOpcode() == AArch64ISD::REINTERPRET_CAST)
+ Pred = Pred->getOperand(0);
+
+ if (Pred->getOpcode() == ISD::CONCAT_VECTORS) {
+ Pred = Pred->getOperand(0);
+ Pred = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv16i1, Pred);
+ return DAG.getNode(AArch64ISD::PTEST_FIRST, DL, N->getValueType(0), Mask,
+ Pred);
+ }
+
+ return SDValue();
+}
+
static SDValue
performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
@@ -27875,6 +27917,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case AArch64ISD::UMULL:
case AArch64ISD::PMULL:
return performMULLCombine(N, DCI, DAG);
+ case AArch64ISD::PTEST_FIRST:
+ return performPTestFirstCombine(N, DCI, DAG);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (N->getConstantOperandVal(1)) {
@@ -29564,7 +29608,7 @@ void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
// than doing it here in finalizeLowering.
if (MFI.hasStackProtectorIndex()) {
for (unsigned int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
- if (MFI.getStackID(i) == TargetStackID::ScalableVector &&
+ if (MFI.hasScalableStackID(i) &&
MFI.getObjectSSPLayout(i) != MachineFrameInfo::SSPLK_None) {
MFI.setStackID(MFI.getStackProtectorIndex(),
TargetStackID::ScalableVector);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index f07d351..6ef0a95 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -10176,28 +10176,6 @@ multiclass SIMDScalarLShiftBHSD<bit U, bits<5> opc, string asm,
(!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftL64:$imm)>;
}
-multiclass SIMDScalarRShiftBHSD<bit U, bits<5> opc, string asm> {
- def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
- FPR8, FPR8, vecshiftR8, asm, []> {
- let Inst{18-16} = imm{2-0};
- }
-
- def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
- FPR16, FPR16, vecshiftR16, asm, []> {
- let Inst{19-16} = imm{3-0};
- }
-
- def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
- FPR32, FPR32, vecshiftR32, asm, []> {
- let Inst{20-16} = imm{4-0};
- }
-
- def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
- FPR64, FPR64, vecshiftR64, asm, []> {
- let Inst{21-16} = imm{5-0};
- }
-}
-
//----------------------------------------------------------------------------
// AdvSIMD vector x indexed element
//----------------------------------------------------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 5a51c81..5a90da1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1503,6 +1503,13 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
getElementSizeForOpcode(PredOpcode))
return PredOpcode;
+ // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since
+ // WHILEcc performs an implicit PTEST with an all active mask, setting
+ // the N flag as the PTEST_FIRST would.
+ if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&
+ isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)
+ return PredOpcode;
+
return {};
}
@@ -5592,7 +5599,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_PXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalablePredicateVector;
}
break;
}
@@ -5607,7 +5614,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
Opc = AArch64::STRSui;
else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
Opc = AArch64::STR_PPXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalablePredicateVector;
}
break;
case 8:
@@ -5777,7 +5784,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
if (IsPNR)
PNRReg = DestReg;
Opc = AArch64::LDR_PXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalablePredicateVector;
}
break;
}
@@ -5792,7 +5799,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
Opc = AArch64::LDRSui;
else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
Opc = AArch64::LDR_PPXI;
- StackID = TargetStackID::ScalableVector;
+ StackID = TargetStackID::ScalablePredicateVector;
}
break;
case 8:
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index a81f5b3..b3c9656 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -23,12 +23,21 @@
using namespace llvm;
+static std::optional<uint64_t>
+getSVEStackSize(const AArch64FunctionInfo &MFI,
+ uint64_t (AArch64FunctionInfo::*GetStackSize)() const) {
+ if (!MFI.hasCalculatedStackSizeSVE())
+ return std::nullopt;
+ return (MFI.*GetStackSize)();
+}
+
yaml::AArch64FunctionInfo::AArch64FunctionInfo(
const llvm::AArch64FunctionInfo &MFI)
: HasRedZone(MFI.hasRedZone()),
- StackSizeSVE(MFI.hasCalculatedStackSizeSVE()
- ? std::optional<uint64_t>(MFI.getStackSizeSVE())
- : std::nullopt),
+ StackSizeZPR(
+ getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizeZPR)),
+ StackSizePPR(
+ getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizePPR)),
HasStackFrame(MFI.hasStackFrame()
? std::optional<bool>(MFI.hasStackFrame())
: std::nullopt) {}
@@ -41,8 +50,9 @@ void AArch64FunctionInfo::initializeBaseYamlFields(
const yaml::AArch64FunctionInfo &YamlMFI) {
if (YamlMFI.HasRedZone)
HasRedZone = YamlMFI.HasRedZone;
- if (YamlMFI.StackSizeSVE)
- setStackSizeSVE(*YamlMFI.StackSizeSVE);
+ if (YamlMFI.StackSizeZPR || YamlMFI.StackSizePPR)
+ setStackSizeSVE(YamlMFI.StackSizeZPR.value_or(0),
+ YamlMFI.StackSizePPR.value_or(0));
if (YamlMFI.HasStackFrame)
setHasStackFrame(*YamlMFI.HasStackFrame);
}
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 897c7e8..91e64e6 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -74,13 +74,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// Amount of stack frame size, not including callee-saved registers.
uint64_t LocalStackSize = 0;
- /// The start and end frame indices for the SVE callee saves.
- int MinSVECSFrameIndex = 0;
- int MaxSVECSFrameIndex = 0;
-
/// Amount of stack frame size used for saving callee-saved registers.
unsigned CalleeSavedStackSize = 0;
- unsigned SVECalleeSavedStackSize = 0;
+ unsigned ZPRCalleeSavedStackSize = 0;
+ unsigned PPRCalleeSavedStackSize = 0;
bool HasCalleeSavedStackSize = false;
bool HasSVECalleeSavedStackSize = false;
@@ -137,9 +134,14 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// SVE stack size (for predicates and data vectors) are maintained here
/// rather than in FrameInfo, as the placement and Stack IDs are target
/// specific.
- uint64_t StackSizeSVE = 0;
+ uint64_t StackSizeZPR = 0;
+ uint64_t StackSizePPR = 0;
+
+ /// Are SVE objects (vectors and predicates) split into separate regions on
+ /// the stack.
+ bool SplitSVEObjects = false;
- /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid.
+ /// HasCalculatedStackSizeSVE indicates whether StackSizeZPR/PPR is valid.
bool HasCalculatedStackSizeSVE = false;
/// Has a value when it is known whether or not the function uses a
@@ -312,16 +314,25 @@ public:
TailCallReservedStack = bytes;
}
- bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
-
- void setStackSizeSVE(uint64_t S) {
+ void setStackSizeSVE(uint64_t ZPR, uint64_t PPR) {
+ StackSizeZPR = ZPR;
+ StackSizePPR = PPR;
HasCalculatedStackSizeSVE = true;
- StackSizeSVE = S;
}
- uint64_t getStackSizeSVE() const {
+ uint64_t getStackSizeZPR() const {
+ assert(hasCalculatedStackSizeSVE());
+ return StackSizeZPR;
+ }
+ uint64_t getStackSizePPR() const {
assert(hasCalculatedStackSizeSVE());
- return StackSizeSVE;
+ return StackSizePPR;
+ }
+
+ bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
+
+ bool hasSVEStackSize() const {
+ return getStackSizeZPR() > 0 || getStackSizePPR() > 0;
}
bool hasStackFrame() const { return HasStackFrame; }
@@ -329,7 +340,6 @@ public:
bool isStackRealigned() const { return StackRealigned; }
void setStackRealigned(bool s) { StackRealigned = s; }
-
bool hasCalleeSaveStackFreeSpace() const {
return CalleeSaveStackHasFreeSpace;
}
@@ -414,29 +424,37 @@ public:
}
// Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes'
- void setSVECalleeSavedStackSize(unsigned Size) {
- SVECalleeSavedStackSize = Size;
+ void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR) {
+ ZPRCalleeSavedStackSize = ZPR;
+ PPRCalleeSavedStackSize = PPR;
HasSVECalleeSavedStackSize = true;
}
- unsigned getSVECalleeSavedStackSize() const {
+ unsigned getZPRCalleeSavedStackSize() const {
assert(HasSVECalleeSavedStackSize &&
- "SVECalleeSavedStackSize has not been calculated");
- return SVECalleeSavedStackSize;
+ "ZPRCalleeSavedStackSize has not been calculated");
+ return ZPRCalleeSavedStackSize;
}
-
- void setMinMaxSVECSFrameIndex(int Min, int Max) {
- MinSVECSFrameIndex = Min;
- MaxSVECSFrameIndex = Max;
+ unsigned getPPRCalleeSavedStackSize() const {
+ assert(HasSVECalleeSavedStackSize &&
+ "PPRCalleeSavedStackSize has not been calculated");
+ return PPRCalleeSavedStackSize;
}
- int getMinSVECSFrameIndex() const { return MinSVECSFrameIndex; }
- int getMaxSVECSFrameIndex() const { return MaxSVECSFrameIndex; }
+ unsigned getSVECalleeSavedStackSize() const {
+ assert(!hasSplitSVEObjects() &&
+ "ZPRs and PPRs are split. Use get[ZPR|PPR]CalleeSavedStackSize()");
+ return getZPRCalleeSavedStackSize() + getPPRCalleeSavedStackSize();
+ }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
unsigned getNumLocalDynamicTLSAccesses() const {
return NumLocalDynamicTLSAccesses;
}
+ bool isStackHazardIncludedInCalleeSaveArea() const {
+ return hasStackHazardSlotIndex() && !hasSplitSVEObjects();
+ }
+
std::optional<bool> hasRedZone() const { return HasRedZone; }
void setHasRedZone(bool s) { HasRedZone = s; }
@@ -472,6 +490,15 @@ public:
StackHazardCSRSlotIndex = Index;
}
+ bool hasSplitSVEObjects() const { return SplitSVEObjects; }
+ void setSplitSVEObjects(bool s) { SplitSVEObjects = s; }
+
+ bool hasSVE_AAPCS(const MachineFunction &MF) const {
+ return hasSplitSVEObjects() || isSVECC() ||
+ MF.getFunction().getCallingConv() ==
+ CallingConv::AArch64_SVE_VectorCall;
+ }
+
SMEAttrs getSMEFnAttrs() const { return SMEFnAttrs; }
unsigned getSRetReturnReg() const { return SRetReturnReg; }
@@ -611,7 +638,8 @@ private:
namespace yaml {
struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
std::optional<bool> HasRedZone;
- std::optional<uint64_t> StackSizeSVE;
+ std::optional<uint64_t> StackSizeZPR;
+ std::optional<uint64_t> StackSizePPR;
std::optional<bool> HasStackFrame;
AArch64FunctionInfo() = default;
@@ -624,7 +652,8 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
template <> struct MappingTraits<AArch64FunctionInfo> {
static void mapping(IO &YamlIO, AArch64FunctionInfo &MFI) {
YamlIO.mapOptional("hasRedZone", MFI.HasRedZone);
- YamlIO.mapOptional("stackSizeSVE", MFI.StackSizeSVE);
+ YamlIO.mapOptional("stackSizeZPR", MFI.StackSizeZPR);
+ YamlIO.mapOptional("stackSizePPR", MFI.StackSizePPR);
YamlIO.mapOptional("hasStackFrame", MFI.HasStackFrame);
}
};
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 09b3643..aed137c 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -48,21 +48,19 @@ bool AArch64PrologueEpilogueCommon::isVGInstruction(
return Opc == TargetOpcode::COPY;
}
-// Convenience function to determine whether I is an SVE callee save.
-static bool isSVECalleeSave(MachineBasicBlock::iterator I) {
+// Convenience function to determine whether I is part of the ZPR callee saves.
+static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I) {
switch (I->getOpcode()) {
default:
return false;
- case AArch64::PTRUE_C_B:
case AArch64::LD1B_2Z_IMM:
case AArch64::ST1B_2Z_IMM:
case AArch64::STR_ZXI:
- case AArch64::STR_PXI:
case AArch64::LDR_ZXI:
- case AArch64::LDR_PXI:
- case AArch64::PTRUE_B:
case AArch64::CPY_ZPzI_B:
case AArch64::CMPNE_PPzZI_B:
+ case AArch64::PTRUE_C_B:
+ case AArch64::PTRUE_B:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
case AArch64::SEH_SavePReg:
@@ -71,6 +69,23 @@ static bool isSVECalleeSave(MachineBasicBlock::iterator I) {
}
}
+// Convenience function to determine whether I is part of the PPR callee saves.
+static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I) {
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case AArch64::STR_PXI:
+ case AArch64::LDR_PXI:
+ return I->getFlag(MachineInstr::FrameSetup) ||
+ I->getFlag(MachineInstr::FrameDestroy);
+ }
+}
+
+// Convenience function to determine whether I is part of the SVE callee saves.
+static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I) {
+ return isPartOfZPRCalleeSaves(I) || isPartOfPPRCalleeSaves(I);
+}
+
AArch64PrologueEpilogueCommon::AArch64PrologueEpilogueCommon(
MachineFunction &MF, MachineBasicBlock &MBB,
const AArch64FrameLowering &AFL)
@@ -316,7 +331,7 @@ bool AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
// When there is an SVE area on the stack, always allocate the
// callee-saves and spills/locals separately.
- if (AFL.getSVEStackSize(MF))
+ if (AFI->hasSVEStackSize())
return false;
return true;
@@ -639,7 +654,7 @@ void AArch64PrologueEmitter::emitPrologue() {
// Now allocate space for the GPR callee saves.
MachineBasicBlock::iterator MBBI = PrologueBeginI;
- while (MBBI != EndI && isSVECalleeSave(MBBI))
+ while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
++MBBI;
FirstGPRSaveI = convertCalleeSaveRestoreToSPPrePostIncDec(
MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
@@ -669,7 +684,7 @@ void AArch64PrologueEmitter::emitPrologue() {
MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
while (AfterGPRSavesI != EndI &&
AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
- !isSVECalleeSave(AfterGPRSavesI)) {
+ !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
if (CombineSPBump &&
// Only fix-up frame-setup load/store instructions.
(!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
@@ -700,56 +715,105 @@ void AArch64PrologueEmitter::emitPrologue() {
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
- StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
- StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
- MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
+ StackOffset PPRCalleeSavesSize =
+ StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
+ StackOffset ZPRCalleeSavesSize =
+ StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
+ StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
+ StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
+ StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
+
+ std::optional<MachineBasicBlock::iterator> ZPRCalleeSavesBegin,
+ ZPRCalleeSavesEnd, PPRCalleeSavesBegin, PPRCalleeSavesEnd;
StackOffset CFAOffset =
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
-
- // Process the SVE callee-saves to determine what space needs to be
- // allocated.
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
- LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
- << "\n");
- SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
- SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
- // Find callee save instructions in frame.
- // Note: With FPAfterSVECalleeSaves the callee saves have already been
- // allocated.
- if (!FPAfterSVECalleeSaves) {
- MachineBasicBlock::iterator CalleeSavesBegin = AfterGPRSavesI;
- assert(isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
- while (isSVECalleeSave(AfterSVESavesI) &&
+ if (!FPAfterSVECalleeSaves) {
+ // Process the SVE callee-saves to find the starts/ends of the ZPR and PPR
+ // areas.
+ PPRCalleeSavesBegin = AfterGPRSavesI;
+ if (PPRCalleeSavesSize) {
+ LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
+ << PPRCalleeSavesSize.getScalable() << "\n");
+
+ assert(isPartOfPPRCalleeSaves(*PPRCalleeSavesBegin) &&
+ "Unexpected instruction");
+ while (isPartOfPPRCalleeSaves(AfterSVESavesI) &&
+ AfterSVESavesI != MBB.getFirstTerminator())
+ ++AfterSVESavesI;
+ }
+ PPRCalleeSavesEnd = ZPRCalleeSavesBegin = AfterSVESavesI;
+ if (ZPRCalleeSavesSize) {
+ LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
+ << ZPRCalleeSavesSize.getScalable() << "\n");
+ assert(isPartOfZPRCalleeSaves(*ZPRCalleeSavesBegin) &&
+ "Unexpected instruction");
+ while (isPartOfZPRCalleeSaves(AfterSVESavesI) &&
AfterSVESavesI != MBB.getFirstTerminator())
++AfterSVESavesI;
- CalleeSavesEnd = AfterSVESavesI;
-
- StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
- // Allocate space for the callee saves (if any).
- allocateStackSpace(CalleeSavesBegin, 0, SVECalleeSavesSize,
- EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || LocalsSize);
}
+ ZPRCalleeSavesEnd = AfterSVESavesI;
}
- CFAOffset += SVECalleeSavesSize;
if (EmitAsyncCFI)
- emitCalleeSavedSVELocations(CalleeSavesEnd);
-
- // Allocate space for the rest of the frame including SVE locals. Align the
- // stack as necessary.
- assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
- "Cannot use redzone with stack realignment");
- if (!AFL.canUseRedZone(MF)) {
- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
- // the correct value here, as NumBytes also includes padding bytes,
- // which shouldn't be counted here.
- allocateStackSpace(CalleeSavesEnd, RealignmentPadding,
- SVELocalsSize + StackOffset::getFixed(NumBytes),
+ emitCalleeSavedSVELocations(AfterSVESavesI);
+
+ if (AFI->hasSplitSVEObjects()) {
+ assert(!FPAfterSVECalleeSaves &&
+ "Cannot use FPAfterSVECalleeSaves with aarch64-split-sve-objects");
+ assert(!AFL.canUseRedZone(MF) &&
+ "Cannot use redzone with aarch64-split-sve-objects");
+ // TODO: Handle HasWinCFI/NeedsWinCFI?
+ assert(!NeedsWinCFI &&
+ "WinCFI with aarch64-split-sve-objects is not supported");
+
+ // Split ZPR and PPR allocation.
+ // Allocate PPR callee saves
+ allocateStackSpace(*PPRCalleeSavesBegin, 0, PPRCalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || ZPRCalleeSavesSize ||
+ ZPRLocalsSize || PPRLocalsSize);
+ CFAOffset += PPRCalleeSavesSize;
+
+ // Allocate PPR locals + ZPR callee saves
+ assert(PPRCalleeSavesEnd == ZPRCalleeSavesBegin &&
+ "Expected ZPR callee saves after PPR locals");
+ allocateStackSpace(*PPRCalleeSavesEnd, RealignmentPadding,
+ PPRLocalsSize + ZPRCalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || ZPRLocalsSize);
+ CFAOffset += PPRLocalsSize + ZPRCalleeSavesSize;
+
+ // Allocate ZPR locals
+ allocateStackSpace(*ZPRCalleeSavesEnd, RealignmentPadding,
+ ZPRLocalsSize + StackOffset::getFixed(NumBytes),
EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects());
+ } else {
+ // Allocate space for the callee saves (if any).
+ StackOffset LocalsSize =
+ PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
+ if (!FPAfterSVECalleeSaves)
+ allocateStackSpace(AfterGPRSavesI, 0, SVECalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || LocalsSize);
+ CFAOffset += SVECalleeSavesSize;
+
+ // Allocate space for the rest of the frame including SVE locals. Align the
+ // stack as necessary.
+ assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
+ "Cannot use redzone with stack realignment");
+ if (!AFL.canUseRedZone(MF)) {
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
+ allocateStackSpace(AfterSVESavesI, RealignmentPadding,
+ SVELocalsSize + StackOffset::getFixed(NumBytes),
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects());
+ }
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -796,7 +860,8 @@ void AArch64PrologueEmitter::emitPrologue() {
emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
} else {
StackOffset TotalSize =
- SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+ AFL.getSVEStackSize(MF) +
+ StackOffset::getFixed((int64_t)MFI.getStackSize());
CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
CFIBuilder.insertCFIInst(
createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
@@ -1165,7 +1230,7 @@ void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
for (const auto &Info : CSI) {
unsigned FrameIdx = Info.getFrameIdx();
- if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
+ if (MFI.hasScalableStackID(FrameIdx))
continue;
assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
@@ -1191,8 +1256,10 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
AFL.getOffsetOfLocalArea();
}
+ StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
for (const auto &Info : CSI) {
- if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector)
+ int FI = Info.getFrameIdx();
+ if (!MFI.hasScalableStackID(FI))
continue;
// Not all unwinders may know about SVE registers, so assume the lowest
@@ -1203,9 +1270,13 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
continue;
StackOffset Offset =
- StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
+ StackOffset::getScalable(MFI.getObjectOffset(FI)) -
StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
+ if (AFI->hasSplitSVEObjects() &&
+ MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ Offset -= PPRStackSize;
+
CFIBuilder.insertCFIInst(
createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
}
@@ -1322,7 +1393,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
while (FirstGPRRestoreI != Begin) {
--FirstGPRRestoreI;
if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
- (!FPAfterSVECalleeSaves && isSVECalleeSave(FirstGPRRestoreI))) {
+ (!FPAfterSVECalleeSaves && isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
++FirstGPRRestoreI;
break;
} else if (CombineSPBump)
@@ -1346,7 +1417,9 @@ void AArch64EpilogueEmitter::emitEpilogue() {
if (HasFP && AFI->hasSwiftAsyncContext())
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
- const StackOffset &SVEStackSize = AFL.getSVEStackSize(MF);
+ StackOffset ZPRStackSize = AFL.getZPRStackSize(MF);
+ StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
@@ -1367,106 +1440,188 @@ void AArch64EpilogueEmitter::emitEpilogue() {
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
- // Process the SVE callee-saves to determine what space needs to be
- // deallocated.
- StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
- MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
- RestoreEnd = FirstGPRRestoreI;
- if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
- if (FPAfterSVECalleeSaves)
- RestoreEnd = MBB.getFirstTerminator();
-
- RestoreBegin = std::prev(RestoreEnd);
- while (RestoreBegin != MBB.begin() &&
- isSVECalleeSave(std::prev(RestoreBegin)))
- --RestoreBegin;
-
- assert(isSVECalleeSave(RestoreBegin) &&
- isSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
-
- StackOffset CalleeSavedSizeAsOffset =
- StackOffset::getScalable(CalleeSavedSize);
- DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
- DeallocateAfter = CalleeSavedSizeAsOffset;
- }
-
- // Deallocate the SVE area.
- if (FPAfterSVECalleeSaves) {
- // If the callee-save area is before FP, restoring the FP implicitly
- // deallocates non-callee-save SVE allocations. Otherwise, deallocate
- // them explicitly.
- if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
- emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
- DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
+ if (!AFI->hasSplitSVEObjects()) {
+ // Process the SVE callee-saves to determine what space needs to be
+ // deallocated.
+ StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
+ MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
+ RestoreEnd = FirstGPRRestoreI;
+ int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
+ int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
+ int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
+
+ if (SVECalleeSavedSize) {
+ if (FPAfterSVECalleeSaves)
+ RestoreEnd = MBB.getFirstTerminator();
+
+ RestoreBegin = std::prev(RestoreEnd);
+ while (RestoreBegin != MBB.begin() &&
+ isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
+ --RestoreBegin;
+
+ assert(isPartOfSVECalleeSaves(RestoreBegin) &&
+ isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
+ "Unexpected instruction");
+
+ StackOffset CalleeSavedSizeAsOffset =
+ StackOffset::getScalable(SVECalleeSavedSize);
+ DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
+ DeallocateAfter = CalleeSavedSizeAsOffset;
}
- // Deallocate callee-save non-SVE registers.
- emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
- // Deallocate fixed objects.
- emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(FixedObject), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
- // Deallocate callee-save SVE registers.
- emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
- DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI);
- } else if (SVEStackSize) {
- int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
- // If we have stack realignment or variable-sized objects we must use the
- // FP to restore SVE callee saves (as there is an unknown amount of
- // data/padding between the SP and SVE CS area).
- Register BaseForSVEDealloc =
- (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
- : AArch64::SP;
- if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
- Register CalleeSaveBase = AArch64::FP;
- if (int64_t CalleeSaveBaseOffset =
- AFI->getCalleeSaveBaseToFrameRecordOffset()) {
- // If we have have an non-zero offset to the non-SVE CS base we need to
- // compute the base address by subtracting the offest in a temporary
- // register first (to avoid briefly deallocating the SVE CS).
- CalleeSaveBase =
- MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
- emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
- StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
- MachineInstr::FrameDestroy);
- }
- // The code below will deallocate the stack space space by moving the
- // SP to the start of the SVE callee-save area.
- emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
- StackOffset::getScalable(-SVECalleeSavedSize), TII,
- MachineInstr::FrameDestroy);
- } else if (BaseForSVEDealloc == AArch64::SP) {
- if (SVECalleeSavedSize) {
- // Deallocate the non-SVE locals first before we can deallocate (and
- // restore callee saves) from the SVE area.
- emitFrameOffset(
- MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
- false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
- SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
- NumBytes = 0;
+ // Deallocate the SVE area.
+ if (FPAfterSVECalleeSaves) {
+ // If the callee-save area is before FP, restoring the FP implicitly
+ // deallocates non-callee-save SVE allocations. Otherwise, deallocate
+ // them explicitly.
+ if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
+ emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
+ DeallocateBefore, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI);
}
+ // Deallocate callee-save non-SVE registers.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
- SVEStackSize +
- StackOffset::getFixed(NumBytes + PrologueSaveSize));
+ StackOffset::getFixed(AFI->getCalleeSavedStackSize()),
+ TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI);
+
+ // Deallocate fixed objects.
+ emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(FixedObject), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI);
+ // Deallocate callee-save SVE registers.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
- DeallocateAfter +
- StackOffset::getFixed(NumBytes + PrologueSaveSize));
+ NeedsWinCFI, &HasWinCFI);
+ } else if (SVEStackSize) {
+ int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
+ // If we have stack realignment or variable-sized objects we must use the
+ // FP to restore SVE callee saves (as there is an unknown amount of
+ // data/padding between the SP and SVE CS area).
+ Register BaseForSVEDealloc =
+ (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
+ : AArch64::SP;
+ if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
+ Register CalleeSaveBase = AArch64::FP;
+ if (int64_t CalleeSaveBaseOffset =
+ AFI->getCalleeSaveBaseToFrameRecordOffset()) {
+ // If we have have an non-zero offset to the non-SVE CS base we need
+ // to compute the base address by subtracting the offest in a
+ // temporary register first (to avoid briefly deallocating the SVE
+ // CS).
+ CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
+ &AArch64::GPR64RegClass);
+ emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
+ StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
+ MachineInstr::FrameDestroy);
+ }
+ // The code below will deallocate the stack space space by moving the
+ // SP to the start of the SVE callee-save area.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
+ StackOffset::getScalable(-SVECalleeSavedSize), TII,
+ MachineInstr::FrameDestroy);
+ } else if (BaseForSVEDealloc == AArch64::SP) {
+ if (SVECalleeSavedSize) {
+ // Deallocate the non-SVE locals first before we can deallocate (and
+ // restore callee saves) from the SVE area.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(NumBytes), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI, EmitCFI && !HasFP,
+ SVEStackSize + StackOffset::getFixed(
+ NumBytes + PrologueSaveSize));
+ NumBytes = 0;
+ }
+
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ DeallocateBefore, TII, MachineInstr::FrameDestroy,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ SVEStackSize +
+ StackOffset::getFixed(NumBytes + PrologueSaveSize));
+
+ emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
+ DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
+ NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ DeallocateAfter +
+ StackOffset::getFixed(NumBytes + PrologueSaveSize));
+ }
+
+ if (EmitCFI)
+ emitCalleeSavedSVERestores(RestoreEnd);
+ }
+ } else if (AFI->hasSplitSVEObjects() && SVEStackSize) {
+ // TODO: Support stack realigment and variable-sized objects.
+ assert(!AFI->isStackRealigned() && !MFI.hasVarSizedObjects() &&
+ "unexpected stack realignment or variable sized objects with split "
+ "SVE stack objects");
+ // SplitSVEObjects. Determine the sizes and starts/ends of the ZPR and PPR
+ // areas.
+ auto ZPRCalleeSavedSize =
+ StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
+ auto PPRCalleeSavedSize =
+ StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
+ StackOffset PPRLocalsSize = PPRStackSize - PPRCalleeSavedSize;
+ StackOffset ZPRLocalsSize = ZPRStackSize - ZPRCalleeSavedSize;
+
+ MachineBasicBlock::iterator PPRRestoreBegin = FirstGPRRestoreI,
+ PPRRestoreEnd = FirstGPRRestoreI;
+ if (PPRCalleeSavedSize) {
+ PPRRestoreBegin = std::prev(PPRRestoreEnd);
+ while (PPRRestoreBegin != MBB.begin() &&
+ isPartOfPPRCalleeSaves(std::prev(PPRRestoreBegin)))
+ --PPRRestoreBegin;
+ }
+
+ MachineBasicBlock::iterator ZPRRestoreBegin = PPRRestoreBegin,
+ ZPRRestoreEnd = PPRRestoreBegin;
+ if (ZPRCalleeSavedSize) {
+ ZPRRestoreBegin = std::prev(ZPRRestoreEnd);
+ while (ZPRRestoreBegin != MBB.begin() &&
+ isPartOfZPRCalleeSaves(std::prev(ZPRRestoreBegin)))
+ --ZPRRestoreBegin;
}
+
+ auto CFAOffset =
+ SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize);
+ if (PPRCalleeSavedSize || ZPRCalleeSavedSize) {
+ // Deallocate the non-SVE locals first before we can deallocate (and
+ // restore callee saves) from the SVE area.
+ auto NonSVELocals = StackOffset::getFixed(NumBytes);
+ emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
+ NonSVELocals, TII, MachineInstr::FrameDestroy, false,
+ false, nullptr, EmitCFI && !HasFP, CFAOffset);
+ NumBytes = 0;
+ CFAOffset -= NonSVELocals;
+ }
+
+ if (ZPRLocalsSize) {
+ emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
+ ZPRLocalsSize, TII, MachineInstr::FrameDestroy, false,
+ false, nullptr, EmitCFI && !HasFP, CFAOffset);
+ CFAOffset -= ZPRLocalsSize;
+ }
+
+ if (PPRLocalsSize || ZPRCalleeSavedSize) {
+ assert(PPRRestoreBegin == ZPRRestoreEnd &&
+ "Expected PPR restores after ZPR");
+ emitFrameOffset(MBB, PPRRestoreBegin, DL, AArch64::SP, AArch64::SP,
+ PPRLocalsSize + ZPRCalleeSavedSize, TII,
+ MachineInstr::FrameDestroy, false, false, nullptr,
+ EmitCFI && !HasFP, CFAOffset);
+ CFAOffset -= PPRLocalsSize + ZPRCalleeSavedSize;
+ }
+ if (PPRCalleeSavedSize) {
+ emitFrameOffset(MBB, PPRRestoreEnd, DL, AArch64::SP, AArch64::SP,
+ PPRCalleeSavedSize, TII, MachineInstr::FrameDestroy,
+ false, false, nullptr, EmitCFI && !HasFP, CFAOffset);
+ }
+
+ // We only emit CFI information for ZPRs so emit CFI after the ZPR restores.
if (EmitCFI)
- emitCalleeSavedSVERestores(RestoreEnd);
+ emitCalleeSavedSVERestores(ZPRRestoreEnd);
}
if (!HasFP) {
@@ -1624,8 +1779,7 @@ void AArch64EpilogueEmitter::emitCalleeSavedRestores(
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
for (const auto &Info : CSI) {
- if (SVE !=
- (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
+ if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
continue;
MCRegister Reg = Info.getReg();
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 2b0c8ad..79975b0 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -71,6 +71,7 @@ bool AArch64RegisterInfo::regNeedsCFI(MCRegister Reg,
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
+ auto &AFI = *MF->getInfo<AArch64FunctionInfo>();
if (MF->getFunction().getCallingConv() == CallingConv::GHC)
// GHC set of callee saved regs is empty as all those regs are
@@ -101,10 +102,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_Win_AArch64_AAPCS_SwiftTail_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
return CSR_Win_AArch64_AAVPCS_SaveList;
- if (MF->getFunction().getCallingConv() ==
- CallingConv::AArch64_SVE_VectorCall)
- return CSR_Win_AArch64_SVE_AAPCS_SaveList;
- if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
+ if (AFI.hasSVE_AAPCS(*MF))
return CSR_Win_AArch64_SVE_AAPCS_SaveList;
return CSR_Win_AArch64_AAPCS_SaveList;
}
@@ -148,7 +146,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
// This is for OSes other than Windows; Windows is a separate case further
// above.
return CSR_AArch64_AAPCS_X18_SaveList;
- if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
+ if (AFI.hasSVE_AAPCS(*MF))
return CSR_AArch64_SVE_AAPCS_SaveList;
return CSR_AArch64_AAPCS_SaveList;
}
@@ -158,6 +156,7 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
assert(MF->getSubtarget<AArch64Subtarget>().isTargetDarwin() &&
"Invalid subtarget for getDarwinCalleeSavedRegs");
+ auto &AFI = *MF->getInfo<AArch64FunctionInfo>();
if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
report_fatal_error(
@@ -205,7 +204,7 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_Darwin_AArch64_RT_AllRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::Win64)
return CSR_Darwin_AArch64_AAPCS_Win64_SaveList;
- if (MF->getInfo<AArch64FunctionInfo>()->isSVECC())
+ if (AFI.hasSVE_AAPCS(*MF))
return CSR_Darwin_AArch64_SVE_AAPCS_SaveList;
return CSR_Darwin_AArch64_AAPCS_SaveList;
}
@@ -643,7 +642,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
if (ST.hasSVE() || ST.isStreaming()) {
// Frames that have variable sized objects and scalable SVE objects,
// should always use a basepointer.
- if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
+ if (!AFI->hasCalculatedStackSizeSVE() || AFI->hasSVEStackSize())
return true;
}
@@ -783,7 +782,7 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
AFI->hasCalculatedStackSizeSVE()) &&
"Expected SVE area to be calculated by this point");
- return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() &&
+ return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->hasSVEStackSize() &&
!AFI->hasStackHazardSlotIndex();
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 7ee54c5..c197550e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -438,7 +438,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
- G_FSINH, G_FTANH})
+ G_FSINH, G_FTANH, G_FMODF})
// We need a call for these, so we always need to scalarize.
.scalarize(0)
// Regardless of FP16 support, widen 16-bit elements to 32-bits.