diff options
author | Sander de Smalen <sander.desmalen@arm.com> | 2019-08-15 10:34:16 +0000 |
---|---|---|
committer | Sander de Smalen <sander.desmalen@arm.com> | 2019-08-15 10:34:16 +0000 |
commit | 643adb55769ecb12e0377dab60de50def15d2cea (patch) | |
tree | 6ba0efc4f1a82c02b203f8ad476331383b977704 /llvm/lib | |
parent | de1d6c822079b3e7565bb3e48865318bee51761c (diff) | |
download | llvm-643adb55769ecb12e0377dab60de50def15d2cea.zip llvm-643adb55769ecb12e0377dab60de50def15d2cea.tar.gz llvm-643adb55769ecb12e0377dab60de50def15d2cea.tar.bz2 |
[AArch64] Change location of frame-record within callee-save area.
This patch changes the location of the frame-record (FP, LR) to the
bottom of the callee-saved area. According to the AAPCS the location of
the frame-record within the stackframe is unspecified (section 5.2.3 The
Frame Pointer), so the compiler should be free to choose a different
location.
The reason for changing the location of the frame-record is to prepare
the frame for allocating an SVE area below the callee-saves. This way the
compiler can use the VL-scaled addressing modes to directly access SVE
objects from the frame-pointer.
: :
| stack | | stack |
| args | | args |
+-------+ +-------+
| x30 | | x19 |
| x29 | | x20 |
FP -> |- - - -| | x21 |
| x19 | ==> | x22 |
| x20 | |- - - -|
| x21 | | x30 |
| x22 | | x29 |
+-------+ +-------+ <- FP
|///////| |///////| // realignment gap
|- - - -| |- - - -|
|spills/| |spills/|
| locals| | locals|
SP -> +-------+ +-------+ <- SP
Things to point out:
- The algorithm to find a paired register should be prevented from
accidentally pairing some callee-saved register with LR that is not
FP, since they should always be paired together when the frame
has a frame-record.
- For Darwin platforms the location of the frame-record is unchanged,
since the unwind encoding does not allow for encoding this position
dynamically and other tools currently depend on the former layout.
Reviewers: efriedma, rovka, rengolin, thegameg, greened, t.p.northover
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D65653
llvm-svn: 368987
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64CallingConvention.td | 30 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 79 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 5 |
3 files changed, 88 insertions, 26 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 2049198..95d3e4d1 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -317,6 +317,12 @@ def CC_AArch64_GHC : CallingConv<[ CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> ]>; +// The order of the callee-saves in this file is important, because the +// FrameLowering code will use this order to determine the layout the +// callee-save area in the stack frame. As can be observed below, Darwin +// requires the frame-record (LR, FP) to be at the top the callee-save area, +// whereas for other platforms they are at the bottom. + // FIXME: LR is only callee-saved in the sense that *we* preserve it and are // presumably a callee to someone. External functions may not do so, but this // is currently safe since BL has LR as an implicit-def and what happens after a @@ -325,7 +331,13 @@ def CC_AArch64_GHC : CallingConv<[ // It would be better to model its preservation semantics properly (create a // vreg on entry, use it in RET & tail call generation; make that vreg def if we // end up saving LR as part of a call frame). Watch this space... -def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, +def CSR_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, + D8, D9, D10, D11, + D12, D13, D14, D15)>; + +// Darwin puts the frame-record at the top of the callee-save area. +def CSR_Darwin_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, D8, D9, D10, D11, D12, D13, D14, D15)>; @@ -333,21 +345,21 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, // Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x. // We put FP before LR, so that frame lowering logic generates (FP,LR) pairs, // and not (LR,FP) pairs. -def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, +def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, FP, LR, D8, D9, D10, D11, D12, D13, D14, D15)>; // AArch64 PCS for vector functions (VPCS) // must (additionally) preserve full Q8-Q23 registers -def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, +def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, (sequence "Q%u", 8, 23))>; // Functions taking SVE arguments or returning an SVE type // must (additionally) preserve full Z8-Z23 and predicate registers P4-P15 -def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, +def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24, + X25, X26, X27, X28, LR, FP, (sequence "Z%u", 8, 23), (sequence "P%u", 4, 15))>; @@ -362,7 +374,7 @@ def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; def CSR_AArch64_AAPCS_SwiftError - : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; + : CalleeSavedRegs<(sub CSR_Darwin_AArch64_AAPCS, X21)>; // The function used by Darwin to obtain the address of a thread-local variable // guarantees more than a normal AAPCS function. x16 and x17 are used on the @@ -378,7 +390,7 @@ def CSR_AArch64_TLS_Darwin // fast path calls a function that follows CSR_AArch64_TLS_Darwin, // CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin. def CSR_AArch64_CXX_TLS_Darwin - : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + : CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), (sequence "D%u", 0, 31))>; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index cc7cbbb..e9cd1e1 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -44,11 +44,15 @@ // | | // |-----------------------------------| // | | -// | prev_fp, prev_lr | +// | callee-saved gpr registers | <--. +// | | | On Darwin platforms these +// |- - - - - - - - - - - - - - - - - -| | callee saves are swapped, +// | | | (frame record first) +// | prev_fp, prev_lr | <--' // | (a.k.a. "frame record") | // |-----------------------------------| <- fp(=x29) // | | -// | other callee-saved registers | +// | callee-saved fp/simd/SVE regs | // | | // |-----------------------------------| // |.empty.space.to.make.part.below....| @@ -80,6 +84,20 @@ // * A frame pointer is definitely needed when there are local variables with // more-than-default alignment requirements. // +// For Darwin platforms the frame-record (fp, lr) is stored at the top of the +// callee-saved area, since the unwind encoding does not allow for encoding +// this dynamically and existing tools depend on this layout. For other +// platforms, the frame-record is stored at the bottom of the (gpr) callee-saved +// area to allow SVE stack objects (allocated directly below the callee-saves, +// if available) to be accessed directly from the framepointer. +// The SVE spill/fill instructions have VL-scaled addressing modes such +// as: +// ldr z8, [fp, #-7 mul vl] +// For SVE the size of the vector length (VL) is not known at compile-time, so +// '#-7 mul vl' is an offset that can only be evaluated at runtime. With this +// layout, we don't need to add an unscaled offset to the framepointer before +// accessing the SVE object in the frame. +// // In some cases when a base pointer is not strictly needed, it is generated // anyway when offsets from the frame pointer to access local variables become // so large that the offset can't be encoded in the immediate fields of loads @@ -793,6 +811,10 @@ static bool needsWinCFI(const MachineFunction &MF) { F.needsUnwindTableEntry(); } +static bool isTargetDarwin(const MachineFunction &MF) { + return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin(); +} + void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -952,9 +974,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, } if (HasFP) { - // Only set up FP if we actually need to. Frame pointer is fp = - // sp - fixedobject - 16. - int FPOffset = AFI->getCalleeSavedStackSize() - 16; + // Only set up FP if we actually need to. + int FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0; + if (CombineSPBump) FPOffset += AFI->getLocalStackSize(); @@ -1136,7 +1158,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); - const int StackGrowth = -TD.getPointerSize(0); + const int StackGrowth = isTargetDarwin(MF) + ? (2 * -TD.getPointerSize(0)) + : -AFI->getCalleeSavedStackSize(); Register FramePtr = RegInfo->getFrameRegister(MF); // An example of the prologue: // @@ -1208,7 +1232,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Define the current CFA rule to use the provided FP. unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( - nullptr, Reg, 2 * StackGrowth - FixedObject)); + nullptr, Reg, StackGrowth - FixedObject)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -1462,11 +1486,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // FIXME: Rather than doing the math here, we should instead just use // non-post-indexed loads for the restores if we aren't actually going to // be able to save any instructions. - if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) + if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) { + int64_t OffsetToFrameRecord = + isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0; emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, - {-(int64_t)AFI->getCalleeSavedStackSize() + 16, MVT::i8}, + {OffsetToFrameRecord, MVT::i8}, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); - else if (NumBytes) + } else if (NumBytes) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI); @@ -1526,7 +1552,8 @@ static StackOffset getFPOffset(const MachineFunction &MF, int ObjectOffset) { bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; - return {ObjectOffset + FixedObject + 16, MVT::i8}; + unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize(); + return {ObjectOffset + FixedObject + FPAdjust, MVT::i8}; } static StackOffset getStackOffset(const MachineFunction &MF, int ObjectOffset) { @@ -1689,6 +1716,23 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, return true; } +/// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction. +/// WindowsCFI requires that only consecutive registers can be paired. +/// LR and FP need to be allocated together when the frame needs to save +/// the frame-record. This means any other register pairing with LR is invalid. +static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, + bool NeedsWinCFI, bool NeedsFrameRecord) { + if (NeedsWinCFI) + return invalidateWindowsRegisterPairing(Reg1, Reg2, true); + + // If we need to store the frame record, don't pair any register + // with LR other than FP. + if (NeedsFrameRecord) + return Reg2 == AArch64::LR; + + return false; +} + namespace { struct RegPairInfo { @@ -1708,7 +1752,7 @@ struct RegPairInfo { static void computeCalleeSaveRegisterPairs( MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs, - bool &NeedShadowCallStackProlog) { + bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) { if (CSI.empty()) return; @@ -1750,7 +1794,8 @@ static void computeCalleeSaveRegisterPairs( switch (RPI.Type) { case RegPairInfo::GPR: if (AArch64::GPR64RegClass.contains(NextReg) && - !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI)) + !invalidateRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, + NeedsFrameRecord)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR64: @@ -1784,6 +1829,10 @@ static void computeCalleeSaveRegisterPairs( (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); + assert((!RPI.isPaired() || RPI.Reg2 != AArch64::FP || + RPI.Reg1 == AArch64::LR) && + "FrameRecord must be allocated together with LR"); + // MachO's compact unwind format relies on all registers being stored in // adjacent register pairs. assert((!produceCompactUnwindFrame(MF) || @@ -1832,7 +1881,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( bool NeedShadowCallStackProlog = false; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, - NeedShadowCallStackProlog); + NeedShadowCallStackProlog, hasFP(MF)); const MachineRegisterInfo &MRI = MF.getRegInfo(); if (NeedShadowCallStackProlog) { @@ -1962,7 +2011,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( bool NeedShadowCallStackProlog = false; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, - NeedShadowCallStackProlog); + NeedShadowCallStackProlog, hasFP(MF)); auto EmitMI = [&](const RegPairInfo &RPI) { unsigned Reg1 = RPI.Reg1; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index fea181a..5421276 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -64,8 +64,9 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_AArch64_AAPCS_SwiftError_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost) return CSR_AArch64_RT_MostRegs_SaveList; - else - return CSR_AArch64_AAPCS_SaveList; + if (MF->getSubtarget<AArch64Subtarget>().isTargetDarwin()) + return CSR_Darwin_AArch64_AAPCS_SaveList; + return CSR_AArch64_AAPCS_SaveList; } const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy( |