aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorSander de Smalen <sander.desmalen@arm.com>2019-08-15 10:34:16 +0000
committerSander de Smalen <sander.desmalen@arm.com>2019-08-15 10:34:16 +0000
commit643adb55769ecb12e0377dab60de50def15d2cea (patch)
tree6ba0efc4f1a82c02b203f8ad476331383b977704 /llvm/lib
parentde1d6c822079b3e7565bb3e48865318bee51761c (diff)
downloadllvm-643adb55769ecb12e0377dab60de50def15d2cea.zip
llvm-643adb55769ecb12e0377dab60de50def15d2cea.tar.gz
llvm-643adb55769ecb12e0377dab60de50def15d2cea.tar.bz2
[AArch64] Change location of frame-record within callee-save area.
This patch changes the location of the frame-record (FP, LR) to the bottom of the callee-saved area. According to the AAPCS the location of the frame-record within the stackframe is unspecified (section 5.2.3 The Frame Pointer), so the compiler should be free to choose a different location. The reason for changing the location of the frame-record is to prepare the frame for allocating an SVE area below the callee-saves. This way the compiler can use the VL-scaled addressing modes to directly access SVE objects from the frame-pointer. : : | stack | | stack | | args | | args | +-------+ +-------+ | x30 | | x19 | | x29 | | x20 | FP -> |- - - -| | x21 | | x19 | ==> | x22 | | x20 | |- - - -| | x21 | | x30 | | x22 | | x29 | +-------+ +-------+ <- FP |///////| |///////| // realignment gap |- - - -| |- - - -| |spills/| |spills/| | locals| | locals| SP -> +-------+ +-------+ <- SP Things to point out: - The algorithm to find a paired register should be prevented from accidentally pairing some callee-saved register with LR that is not FP, since they should always be paired together when the frame has a frame-record. - For Darwin platforms the location of the frame-record is unchanged, since the unwind encoding does not allow for encoding this position dynamically and other tools currently depend on the former layout. Reviewers: efriedma, rovka, rengolin, thegameg, greened, t.p.northover Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D65653 llvm-svn: 368987
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td30
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp79
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp5
3 files changed, 88 insertions, 26 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 2049198..95d3e4d1 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -317,6 +317,12 @@ def CC_AArch64_GHC : CallingConv<[
CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
]>;
+// The order of the callee-saves in this file is important, because the
+// FrameLowering code will use this order to determine the layout the
+// callee-save area in the stack frame. As can be observed below, Darwin
+// requires the frame-record (LR, FP) to be at the top the callee-save area,
+// whereas for other platforms they are at the bottom.
+
// FIXME: LR is only callee-saved in the sense that *we* preserve it and are
// presumably a callee to someone. External functions may not do so, but this
// is currently safe since BL has LR as an implicit-def and what happens after a
@@ -325,7 +331,13 @@ def CC_AArch64_GHC : CallingConv<[
// It would be better to model its preservation semantics properly (create a
// vreg on entry, use it in RET & tail call generation; make that vreg def if we
// end up saving LR as part of a call frame). Watch this space...
-def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, LR, FP,
+ D8, D9, D10, D11,
+ D12, D13, D14, D15)>;
+
+// Darwin puts the frame-record at the top of the callee-save area.
+def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
X23, X24, X25, X26, X27, X28,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
@@ -333,21 +345,21 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x.
// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs,
// and not (LR,FP) pairs.
-def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22,
- X23, X24, X25, X26, X27, X28,
+def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, FP, LR,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
// AArch64 PCS for vector functions (VPCS)
// must (additionally) preserve full Q8-Q23 registers
-def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
- X23, X24, X25, X26, X27, X28,
+def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, LR, FP,
(sequence "Q%u", 8, 23))>;
// Functions taking SVE arguments or returning an SVE type
// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
-def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
- X23, X24, X25, X26, X27, X28,
+def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
+ X25, X26, X27, X28, LR, FP,
(sequence "Z%u", 8, 23),
(sequence "P%u", 4, 15))>;
@@ -362,7 +374,7 @@ def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
def CSR_AArch64_AAPCS_SwiftError
- : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>;
+ : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>;
// The function used by Darwin to obtain the address of a thread-local variable
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
@@ -378,7 +390,7 @@ def CSR_AArch64_TLS_Darwin
// fast path calls a function that follows CSR_AArch64_TLS_Darwin,
// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin.
def CSR_AArch64_CXX_TLS_Darwin
- : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
+ : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS,
(sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
(sequence "D%u", 0, 31))>;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index cc7cbbb..e9cd1e1 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -44,11 +44,15 @@
// | |
// |-----------------------------------|
// | |
-// | prev_fp, prev_lr |
+// | callee-saved gpr registers | <--.
+// | | | On Darwin platforms these
+// |- - - - - - - - - - - - - - - - - -| | callee saves are swapped,
+// | | | (frame record first)
+// | prev_fp, prev_lr | <--'
// | (a.k.a. "frame record") |
// |-----------------------------------| <- fp(=x29)
// | |
-// | other callee-saved registers |
+// | callee-saved fp/simd/SVE regs |
// | |
// |-----------------------------------|
// |.empty.space.to.make.part.below....|
@@ -80,6 +84,20 @@
// * A frame pointer is definitely needed when there are local variables with
// more-than-default alignment requirements.
//
+// For Darwin platforms the frame-record (fp, lr) is stored at the top of the
+// callee-saved area, since the unwind encoding does not allow for encoding
+// this dynamically and existing tools depend on this layout. For other
+// platforms, the frame-record is stored at the bottom of the (gpr) callee-saved
+// area to allow SVE stack objects (allocated directly below the callee-saves,
+// if available) to be accessed directly from the framepointer.
+// The SVE spill/fill instructions have VL-scaled addressing modes such
+// as:
+// ldr z8, [fp, #-7 mul vl]
+// For SVE the size of the vector length (VL) is not known at compile-time, so
+// '#-7 mul vl' is an offset that can only be evaluated at runtime. With this
+// layout, we don't need to add an unscaled offset to the framepointer before
+// accessing the SVE object in the frame.
+//
// In some cases when a base pointer is not strictly needed, it is generated
// anyway when offsets from the frame pointer to access local variables become
// so large that the offset can't be encoded in the immediate fields of loads
@@ -793,6 +811,10 @@ static bool needsWinCFI(const MachineFunction &MF) {
F.needsUnwindTableEntry();
}
+static bool isTargetDarwin(const MachineFunction &MF) {
+ return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -952,9 +974,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
if (HasFP) {
- // Only set up FP if we actually need to. Frame pointer is fp =
- // sp - fixedobject - 16.
- int FPOffset = AFI->getCalleeSavedStackSize() - 16;
+ // Only set up FP if we actually need to.
+ int FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
+
if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
@@ -1136,7 +1158,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (needsFrameMoves) {
const DataLayout &TD = MF.getDataLayout();
- const int StackGrowth = -TD.getPointerSize(0);
+ const int StackGrowth = isTargetDarwin(MF)
+ ? (2 * -TD.getPointerSize(0))
+ : -AFI->getCalleeSavedStackSize();
Register FramePtr = RegInfo->getFrameRegister(MF);
// An example of the prologue:
//
@@ -1208,7 +1232,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Define the current CFA rule to use the provided FP.
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
- nullptr, Reg, 2 * StackGrowth - FixedObject));
+ nullptr, Reg, StackGrowth - FixedObject));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -1462,11 +1486,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// FIXME: Rather than doing the math here, we should instead just use
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
- if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned()))
+ if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
+ int64_t OffsetToFrameRecord =
+ isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0;
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
- {-(int64_t)AFI->getCalleeSavedStackSize() + 16, MVT::i8},
+ {OffsetToFrameRecord, MVT::i8},
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
- else if (NumBytes)
+ } else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
{NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI);
@@ -1526,7 +1552,8 @@ static StackOffset getFPOffset(const MachineFunction &MF, int ObjectOffset) {
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
- return {ObjectOffset + FixedObject + 16, MVT::i8};
+ unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize();
+ return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
}
static StackOffset getStackOffset(const MachineFunction &MF, int ObjectOffset) {
@@ -1689,6 +1716,23 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
return true;
}
+/// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
+/// WindowsCFI requires that only consecutive registers can be paired.
+/// LR and FP need to be allocated together when the frame needs to save
+/// the frame-record. This means any other register pairing with LR is invalid.
+static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
+ bool NeedsWinCFI, bool NeedsFrameRecord) {
+ if (NeedsWinCFI)
+ return invalidateWindowsRegisterPairing(Reg1, Reg2, true);
+
+ // If we need to store the frame record, don't pair any register
+ // with LR other than FP.
+ if (NeedsFrameRecord)
+ return Reg2 == AArch64::LR;
+
+ return false;
+}
+
namespace {
struct RegPairInfo {
@@ -1708,7 +1752,7 @@ struct RegPairInfo {
static void computeCalleeSaveRegisterPairs(
MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
- bool &NeedShadowCallStackProlog) {
+ bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) {
if (CSI.empty())
return;
@@ -1750,7 +1794,8 @@ static void computeCalleeSaveRegisterPairs(
switch (RPI.Type) {
case RegPairInfo::GPR:
if (AArch64::GPR64RegClass.contains(NextReg) &&
- !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
+ !invalidateRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
+ NeedsFrameRecord))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR64:
@@ -1784,6 +1829,10 @@ static void computeCalleeSaveRegisterPairs(
(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
"Out of order callee saved regs!");
+ assert((!RPI.isPaired() || RPI.Reg2 != AArch64::FP ||
+ RPI.Reg1 == AArch64::LR) &&
+ "FrameRecord must be allocated together with LR");
+
// MachO's compact unwind format relies on all registers being stored in
// adjacent register pairs.
assert((!produceCompactUnwindFrame(MF) ||
@@ -1832,7 +1881,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
bool NeedShadowCallStackProlog = false;
computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
- NeedShadowCallStackProlog);
+ NeedShadowCallStackProlog, hasFP(MF));
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (NeedShadowCallStackProlog) {
@@ -1962,7 +2011,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
bool NeedShadowCallStackProlog = false;
computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
- NeedShadowCallStackProlog);
+ NeedShadowCallStackProlog, hasFP(MF));
auto EmitMI = [&](const RegPairInfo &RPI) {
unsigned Reg1 = RPI.Reg1;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index fea181a..5421276 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -64,8 +64,9 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_AArch64_AAPCS_SwiftError_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
return CSR_AArch64_RT_MostRegs_SaveList;
- else
- return CSR_AArch64_AAPCS_SaveList;
+ if (MF->getSubtarget<AArch64Subtarget>().isTargetDarwin())
+ return CSR_Darwin_AArch64_AAPCS_SaveList;
+ return CSR_AArch64_AAPCS_SaveList;
}
const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(