diff options
Diffstat (limited to 'llvm/lib/Target/AArch64')
8 files changed, 402 insertions, 410 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 91e64e6..bd0a17d 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -315,6 +315,8 @@ public: } void setStackSizeSVE(uint64_t ZPR, uint64_t PPR) { + assert(isAligned(Align(16), ZPR) && isAligned(Align(16), PPR) && + "expected SVE stack sizes to be aligned to 16-bytes"); StackSizeZPR = ZPR; StackSizePPR = PPR; HasCalculatedStackSizeSVE = true; @@ -425,6 +427,8 @@ public: // Saves the CalleeSavedStackSize for SVE vectors in 'scalable bytes' void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR) { + assert(isAligned(Align(16), ZPR) && isAligned(Align(16), PPR) && + "expected SVE callee-save sizes to be aligned to 16-bytes"); ZPRCalleeSavedStackSize = ZPR; PPRCalleeSavedStackSize = PPR; HasSVECalleeSavedStackSize = true; diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index 1568161..f110558 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -60,7 +60,6 @@ static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I) { case AArch64::PTRUE_C_B: return I->getFlag(MachineInstr::FrameSetup) || I->getFlag(MachineInstr::FrameDestroy); - case AArch64::SEH_SavePReg: case AArch64::SEH_SaveZReg: return true; } @@ -75,6 +74,8 @@ static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I) { case AArch64::LDR_PXI: return I->getFlag(MachineInstr::FrameSetup) || I->getFlag(MachineInstr::FrameDestroy); + case AArch64::SEH_SavePReg: + return true; } } @@ -94,6 +95,26 @@ AArch64PrologueEpilogueCommon::AArch64PrologueEpilogueCommon( HasFP = AFL.hasFP(MF); NeedsWinCFI = AFL.needsWinCFI(MF); + + // Windows unwind can't represent the required stack adjustments if we have + // both SVE callee-saves and dynamic stack allocations, and the frame pointer + // is before the SVE spills. The allocation of the frame pointer must be the + // last instruction in the prologue so the unwinder can restore the stack + // pointer correctly. (And there isn't any unwind opcode for `addvl sp, x29, + // -17`.) + // + // Because of this, we do spills in the opposite order on Windows: first SVE, + // then GPRs. The main side-effect of this is that it makes accessing + // parameters passed on the stack more expensive. + // + // We could consider rearranging the spills for simpler cases. + if (Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize()) { + if (AFI->hasStackHazardSlotIndex()) + reportFatalUsageError("SME hazard padding is not supported on Windows"); + SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord; + } else if (AFI->hasSplitSVEObjects()) { + SVELayout = SVEStackLayout::Split; + } } MachineBasicBlock::iterator @@ -334,6 +355,55 @@ bool AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump( return true; } +SVEFrameSizes AArch64PrologueEpilogueCommon::getSVEStackFrameSizes() const { + StackOffset PPRCalleeSavesSize = + StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize()); + StackOffset ZPRCalleeSavesSize = + StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize()); + StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize; + StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize; + if (SVELayout == SVEStackLayout::Split) + return {{PPRCalleeSavesSize, PPRLocalsSize}, + {ZPRCalleeSavesSize, ZPRLocalsSize}}; + // For simplicity, attribute all locals to ZPRs when split SVE is disabled. + return {{PPRCalleeSavesSize, StackOffset{}}, + {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}}; +} + +struct SVEPartitions { + struct { + MachineBasicBlock::iterator Begin, End; + } PPR, ZPR; +}; + +static SVEPartitions partitionSVECS(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + StackOffset PPRCalleeSavesSize, + StackOffset ZPRCalleeSavesSize, + bool IsEpilogue) { + MachineBasicBlock::iterator PPRsI = MBBI; + MachineBasicBlock::iterator End = + IsEpilogue ? MBB.begin() : MBB.getFirstTerminator(); + auto AdjustI = [&](auto MBBI) { return IsEpilogue ? std::prev(MBBI) : MBBI; }; + // Process the SVE CS to find the starts/ends of the ZPR and PPR areas. + if (PPRCalleeSavesSize) { + PPRsI = AdjustI(PPRsI); + assert(isPartOfPPRCalleeSaves(*PPRsI) && "Unexpected instruction"); + while (PPRsI != End && isPartOfPPRCalleeSaves(AdjustI(PPRsI))) + IsEpilogue ? (--PPRsI) : (++PPRsI); + } + MachineBasicBlock::iterator ZPRsI = PPRsI; + if (ZPRCalleeSavesSize) { + ZPRsI = AdjustI(ZPRsI); + assert(isPartOfZPRCalleeSaves(*ZPRsI) && "Unexpected instruction"); + while (ZPRsI != End && isPartOfZPRCalleeSaves(AdjustI(ZPRsI))) + IsEpilogue ? (--ZPRsI) : (++ZPRsI); + } + if (IsEpilogue) + return {{PPRsI, MBBI}, {ZPRsI, PPRsI}}; + return {{MBBI, PPRsI}, {PPRsI, ZPRsI}}; +} + AArch64PrologueEmitter::AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL) @@ -613,30 +683,12 @@ void AArch64PrologueEmitter::emitPrologue() { bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()); unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); - // Windows unwind can't represent the required stack adjustments if we have - // both SVE callee-saves and dynamic stack allocations, and the frame - // pointer is before the SVE spills. The allocation of the frame pointer - // must be the last instruction in the prologue so the unwinder can restore - // the stack pointer correctly. (And there isn't any unwind opcode for - // `addvl sp, x29, -17`.) - // - // Because of this, we do spills in the opposite order on Windows: first SVE, - // then GPRs. The main side-effect of this is that it makes accessing - // parameters passed on the stack more expensive. - // - // We could consider rearranging the spills for simpler cases. - bool FPAfterSVECalleeSaves = - Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize(); - - if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex()) - reportFatalUsageError("SME hazard padding is not supported on Windows"); - auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; // All of the remaining stack allocations are for locals. determineLocalsStackSize(NumBytes, PrologueSaveSize); MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI; - if (FPAfterSVECalleeSaves) { + if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { // If we're doing SVE saves first, we need to immediately allocate space // for fixed objects, then space for the SVE callee saves. // @@ -712,110 +764,66 @@ void AArch64PrologueEmitter::emitPrologue() { if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding); - StackOffset PPRCalleeSavesSize = - StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize()); - StackOffset ZPRCalleeSavesSize = - StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize()); - StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize; - StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize; - StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize; - - std::optional<MachineBasicBlock::iterator> ZPRCalleeSavesBegin, - ZPRCalleeSavesEnd, PPRCalleeSavesBegin, PPRCalleeSavesEnd; - + auto [PPR, ZPR] = getSVEStackFrameSizes(); + StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize; + StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes); StackOffset CFAOffset = - StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes); + StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize; + MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI; - if (!FPAfterSVECalleeSaves) { - // Process the SVE callee-saves to find the starts/ends of the ZPR and PPR - // areas. - PPRCalleeSavesBegin = AfterGPRSavesI; - if (PPRCalleeSavesSize) { - LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = " - << PPRCalleeSavesSize.getScalable() << "\n"); - - assert(isPartOfPPRCalleeSaves(*PPRCalleeSavesBegin) && - "Unexpected instruction"); - while (isPartOfPPRCalleeSaves(AfterSVESavesI) && - AfterSVESavesI != MBB.getFirstTerminator()) - ++AfterSVESavesI; + // Allocate space for the callee saves and PPR locals (if any). + if (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord) { + auto [PPRRange, ZPRRange] = + partitionSVECS(MBB, AfterGPRSavesI, PPR.CalleeSavesSize, + ZPR.CalleeSavesSize, /*IsEpilogue=*/false); + AfterSVESavesI = ZPRRange.End; + if (EmitAsyncCFI) + emitCalleeSavedSVELocations(AfterSVESavesI); + + StackOffset AllocateBeforePPRs = SVECalleeSavesSize; + StackOffset AllocateAfterPPRs = PPR.LocalsSize; + if (SVELayout == SVEStackLayout::Split) { + AllocateBeforePPRs = PPR.CalleeSavesSize; + AllocateAfterPPRs = PPR.LocalsSize + ZPR.CalleeSavesSize; } - PPRCalleeSavesEnd = ZPRCalleeSavesBegin = AfterSVESavesI; - if (ZPRCalleeSavesSize) { - LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = " - << ZPRCalleeSavesSize.getScalable() << "\n"); - assert(isPartOfZPRCalleeSaves(*ZPRCalleeSavesBegin) && - "Unexpected instruction"); - while (isPartOfZPRCalleeSaves(AfterSVESavesI) && - AfterSVESavesI != MBB.getFirstTerminator()) - ++AfterSVESavesI; - } - ZPRCalleeSavesEnd = AfterSVESavesI; - } - - if (EmitAsyncCFI) - emitCalleeSavedSVELocations(AfterSVESavesI); - - if (AFI->hasSplitSVEObjects()) { - assert(!FPAfterSVECalleeSaves && - "Cannot use FPAfterSVECalleeSaves with aarch64-split-sve-objects"); - assert(!AFL.canUseRedZone(MF) && - "Cannot use redzone with aarch64-split-sve-objects"); - // TODO: Handle HasWinCFI/NeedsWinCFI? - assert(!NeedsWinCFI && - "WinCFI with aarch64-split-sve-objects is not supported"); - - // Split ZPR and PPR allocation. - // Allocate PPR callee saves - allocateStackSpace(*PPRCalleeSavesBegin, 0, PPRCalleeSavesSize, + allocateStackSpace(PPRRange.Begin, 0, AllocateBeforePPRs, EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects() || ZPRCalleeSavesSize || - ZPRLocalsSize || PPRLocalsSize); - CFAOffset += PPRCalleeSavesSize; - - // Allocate PPR locals + ZPR callee saves - assert(PPRCalleeSavesEnd == ZPRCalleeSavesBegin && + MFI.hasVarSizedObjects() || AllocateAfterPPRs || + ZPR.LocalsSize || NonSVELocalsSize); + CFAOffset += AllocateBeforePPRs; + assert(PPRRange.End == ZPRRange.Begin && "Expected ZPR callee saves after PPR locals"); - allocateStackSpace(*PPRCalleeSavesEnd, RealignmentPadding, - PPRLocalsSize + ZPRCalleeSavesSize, - EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects() || ZPRLocalsSize); - CFAOffset += PPRLocalsSize + ZPRCalleeSavesSize; - - // Allocate ZPR locals - allocateStackSpace(*ZPRCalleeSavesEnd, RealignmentPadding, - ZPRLocalsSize + StackOffset::getFixed(NumBytes), + allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs, EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects()); + MFI.hasVarSizedObjects() || ZPR.LocalsSize || + NonSVELocalsSize); + CFAOffset += AllocateAfterPPRs; } else { - // Allocate space for the callee saves (if any). - StackOffset LocalsSize = - PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes); - if (!FPAfterSVECalleeSaves) - allocateStackSpace(AfterGPRSavesI, 0, SVECalleeSavesSize, - EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects() || LocalsSize); + assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord); + // Note: With CalleeSavesAboveFrameRecord, the SVE CS have already been + // allocated (and separate PPR locals are not supported, all SVE locals, + // both PPR and ZPR, are within the ZPR locals area). + assert(!PPR.LocalsSize && "Unexpected PPR locals!"); CFAOffset += SVECalleeSavesSize; + } - // Allocate space for the rest of the frame including SVE locals. Align the - // stack as necessary. - assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) && - "Cannot use redzone with stack realignment"); - if (!AFL.canUseRedZone(MF)) { - // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have - // the correct value here, as NumBytes also includes padding bytes, - // which shouldn't be counted here. - StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize; - allocateStackSpace(AfterSVESavesI, RealignmentPadding, - SVELocalsSize + StackOffset::getFixed(NumBytes), - EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects()); - } + // Allocate space for the rest of the frame including ZPR locals. Align the + // stack as necessary. + assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) && + "Cannot use redzone with stack realignment"); + if (!AFL.canUseRedZone(MF)) { + // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the + // correct value here, as NumBytes also includes padding bytes, which + // shouldn't be counted here. + allocateStackSpace( + AfterSVESavesI, RealignmentPadding, ZPR.LocalsSize + NonSVELocalsSize, + EmitAsyncCFI && !HasFP, CFAOffset, MFI.hasVarSizedObjects()); } // If we need a base pointer, set it up here. It's whatever the value of the - // stack pointer is at this point. Any variable size objects will be allocated - // after this, so we can still use the base pointer to reference locals. + // stack pointer is at this point. Any variable size objects will be + // allocated after this, so we can still use the base pointer to reference + // locals. // // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is @@ -1270,7 +1278,9 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations( StackOffset::getScalable(MFI.getObjectOffset(FI)) - StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI)); - if (AFI->hasSplitSVEObjects() && + // The scalable vectors are below (lower address) the scalable predicates + // with split SVE objects, so we must subtract the size of the predicates. + if (SVELayout == SVEStackLayout::Split && MFI.getStackID(FI) == TargetStackID::ScalableVector) Offset -= PPRStackSize; @@ -1349,13 +1359,10 @@ void AArch64EpilogueEmitter::emitEpilogue() { return; } - bool FPAfterSVECalleeSaves = - Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize(); - bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes); // Assume we can't combine the last pop with the sp restore. bool CombineAfterCSRBump = false; - if (FPAfterSVECalleeSaves) { + if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { AfterCSRPopSize += FixedObject; } else if (!CombineSPBump && PrologueSaveSize != 0) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); @@ -1390,7 +1397,8 @@ void AArch64EpilogueEmitter::emitEpilogue() { while (FirstGPRRestoreI != Begin) { --FirstGPRRestoreI; if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) || - (!FPAfterSVECalleeSaves && isPartOfSVECalleeSaves(FirstGPRRestoreI))) { + (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord && + isPartOfSVECalleeSaves(FirstGPRRestoreI))) { ++FirstGPRRestoreI; break; } else if (CombineSPBump) @@ -1414,13 +1422,9 @@ void AArch64EpilogueEmitter::emitEpilogue() { if (HasFP && AFI->hasSwiftAsyncContext()) emitSwiftAsyncContextFramePointer(EpilogueEndI, DL); - StackOffset ZPRStackSize = AFL.getZPRStackSize(MF); - StackOffset PPRStackSize = AFL.getPPRStackSize(MF); - StackOffset SVEStackSize = ZPRStackSize + PPRStackSize; - // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { - assert(!SVEStackSize && "Cannot combine SP bump with SVE"); + assert(!AFI->hasSVEStackSize() && "Cannot combine SP bump with SVE"); // When we are about to restore the CSRs, the CFA register is SP again. if (EmitCFI && HasFP) @@ -1437,188 +1441,122 @@ void AArch64EpilogueEmitter::emitEpilogue() { NumBytes -= PrologueSaveSize; assert(NumBytes >= 0 && "Negative stack allocation size!?"); - if (!AFI->hasSplitSVEObjects()) { - // Process the SVE callee-saves to determine what space needs to be - // deallocated. - StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize; - MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI, - RestoreEnd = FirstGPRRestoreI; - int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize(); - int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize(); - int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize; - - if (SVECalleeSavedSize) { - if (FPAfterSVECalleeSaves) - RestoreEnd = MBB.getFirstTerminator(); - - RestoreBegin = std::prev(RestoreEnd); - while (RestoreBegin != MBB.begin() && - isPartOfSVECalleeSaves(std::prev(RestoreBegin))) - --RestoreBegin; - - assert(isPartOfSVECalleeSaves(RestoreBegin) && - isPartOfSVECalleeSaves(std::prev(RestoreEnd)) && - "Unexpected instruction"); - - StackOffset CalleeSavedSizeAsOffset = - StackOffset::getScalable(SVECalleeSavedSize); - DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset; - DeallocateAfter = CalleeSavedSizeAsOffset; + auto [PPR, ZPR] = getSVEStackFrameSizes(); + auto [PPRRange, ZPRRange] = partitionSVECS( + MBB, + SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord + ? MBB.getFirstTerminator() + : FirstGPRRestoreI, + PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true); + + StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize; + StackOffset SVEStackSize = + SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize; + MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin; + MachineBasicBlock::iterator RestoreEnd = PPRRange.End; + + // Deallocate the SVE area. + if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { + StackOffset SVELocalsSize = ZPR.LocalsSize + PPR.LocalsSize; + // If the callee-save area is before FP, restoring the FP implicitly + // deallocates non-callee-save SVE allocations. Otherwise, deallocate them + // explicitly. + if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) { + emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP, + SVELocalsSize, TII, MachineInstr::FrameDestroy, false, + NeedsWinCFI, &HasWinCFI); } - // Deallocate the SVE area. - if (FPAfterSVECalleeSaves) { - // If the callee-save area is before FP, restoring the FP implicitly - // deallocates non-callee-save SVE allocations. Otherwise, deallocate - // them explicitly. - if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) { - emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP, - DeallocateBefore, TII, MachineInstr::FrameDestroy, - false, NeedsWinCFI, &HasWinCFI); - } + // Deallocate callee-save non-SVE registers. + emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); - // Deallocate callee-save non-SVE registers. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(AFI->getCalleeSavedStackSize()), - TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, - &HasWinCFI); - - // Deallocate fixed objects. - emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(FixedObject), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, - &HasWinCFI); - - // Deallocate callee-save SVE registers. - emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, - DeallocateAfter, TII, MachineInstr::FrameDestroy, false, - NeedsWinCFI, &HasWinCFI); - } else if (SVEStackSize) { - int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize(); - // If we have stack realignment or variable-sized objects we must use the - // FP to restore SVE callee saves (as there is an unknown amount of - // data/padding between the SP and SVE CS area). - Register BaseForSVEDealloc = - (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP - : AArch64::SP; - if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) { - Register CalleeSaveBase = AArch64::FP; - if (int64_t CalleeSaveBaseOffset = - AFI->getCalleeSaveBaseToFrameRecordOffset()) { - // If we have have an non-zero offset to the non-SVE CS base we need - // to compute the base address by subtracting the offest in a - // temporary register first (to avoid briefly deallocating the SVE - // CS). - CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister( - &AArch64::GPR64RegClass); - emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP, - StackOffset::getFixed(-CalleeSaveBaseOffset), TII, - MachineInstr::FrameDestroy); - } - // The code below will deallocate the stack space space by moving the - // SP to the start of the SVE callee-save area. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase, - StackOffset::getScalable(-SVECalleeSavedSize), TII, + // Deallocate fixed objects. + emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(FixedObject), TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); + + // Deallocate callee-save SVE registers. + emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, + SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false, + NeedsWinCFI, &HasWinCFI); + } else if (AFI->hasSVEStackSize()) { + // If we have stack realignment or variable-sized objects we must use the FP + // to restore SVE callee saves (as there is an unknown amount of + // data/padding between the SP and SVE CS area). + Register BaseForSVEDealloc = + (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP + : AArch64::SP; + if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) { + // TODO: Support stack realigment and variable-sized objects. + assert( + SVELayout != SVEStackLayout::Split && + "unexpected stack realignment or variable sized objects with split " + "SVE stack objects"); + + Register CalleeSaveBase = AArch64::FP; + if (int64_t CalleeSaveBaseOffset = + AFI->getCalleeSaveBaseToFrameRecordOffset()) { + // If we have have an non-zero offset to the non-SVE CS base we need to + // compute the base address by subtracting the offest in a temporary + // register first (to avoid briefly deallocating the SVE CS). + CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister( + &AArch64::GPR64RegClass); + emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP, + StackOffset::getFixed(-CalleeSaveBaseOffset), TII, MachineInstr::FrameDestroy); - } else if (BaseForSVEDealloc == AArch64::SP) { - if (SVECalleeSavedSize) { - // Deallocate the non-SVE locals first before we can deallocate (and - // restore callee saves) from the SVE area. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(NumBytes), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, - &HasWinCFI, EmitCFI && !HasFP, - SVEStackSize + StackOffset::getFixed( - NumBytes + PrologueSaveSize)); - NumBytes = 0; - } - - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - DeallocateBefore, TII, MachineInstr::FrameDestroy, - false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, - SVEStackSize + - StackOffset::getFixed(NumBytes + PrologueSaveSize)); - - emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, - DeallocateAfter, TII, MachineInstr::FrameDestroy, false, - NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, - DeallocateAfter + - StackOffset::getFixed(NumBytes + PrologueSaveSize)); + } + // The code below will deallocate the stack space space by moving the SP + // to the start of the SVE callee-save area. + emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase, + -SVECalleeSavesSize, TII, MachineInstr::FrameDestroy); + } else if (BaseForSVEDealloc == AArch64::SP) { + auto CFAOffset = + SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize); + + if (SVECalleeSavesSize) { + // Deallocate the non-SVE locals first before we can deallocate (and + // restore callee saves) from the SVE area. + auto NonSVELocals = StackOffset::getFixed(NumBytes); + emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, + NonSVELocals, TII, MachineInstr::FrameDestroy, false, + NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset); + CFAOffset -= NonSVELocals; + NumBytes = 0; } - if (EmitCFI) - emitCalleeSavedSVERestores(RestoreEnd); - } - } else if (AFI->hasSplitSVEObjects() && SVEStackSize) { - // TODO: Support stack realigment and variable-sized objects. - assert(!AFI->isStackRealigned() && !MFI.hasVarSizedObjects() && - "unexpected stack realignment or variable sized objects with split " - "SVE stack objects"); - // SplitSVEObjects. Determine the sizes and starts/ends of the ZPR and PPR - // areas. - auto ZPRCalleeSavedSize = - StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize()); - auto PPRCalleeSavedSize = - StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize()); - StackOffset PPRLocalsSize = PPRStackSize - PPRCalleeSavedSize; - StackOffset ZPRLocalsSize = ZPRStackSize - ZPRCalleeSavedSize; - - MachineBasicBlock::iterator PPRRestoreBegin = FirstGPRRestoreI, - PPRRestoreEnd = FirstGPRRestoreI; - if (PPRCalleeSavedSize) { - PPRRestoreBegin = std::prev(PPRRestoreEnd); - while (PPRRestoreBegin != MBB.begin() && - isPartOfPPRCalleeSaves(std::prev(PPRRestoreBegin))) - --PPRRestoreBegin; - } - - MachineBasicBlock::iterator ZPRRestoreBegin = PPRRestoreBegin, - ZPRRestoreEnd = PPRRestoreBegin; - if (ZPRCalleeSavedSize) { - ZPRRestoreBegin = std::prev(ZPRRestoreEnd); - while (ZPRRestoreBegin != MBB.begin() && - isPartOfZPRCalleeSaves(std::prev(ZPRRestoreBegin))) - --ZPRRestoreBegin; - } - - auto CFAOffset = - SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize); - if (PPRCalleeSavedSize || ZPRCalleeSavedSize) { - // Deallocate the non-SVE locals first before we can deallocate (and - // restore callee saves) from the SVE area. - auto NonSVELocals = StackOffset::getFixed(NumBytes); - emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP, - NonSVELocals, TII, MachineInstr::FrameDestroy, false, - false, nullptr, EmitCFI && !HasFP, CFAOffset); - NumBytes = 0; - CFAOffset -= NonSVELocals; - } + if (ZPR.LocalsSize) { + emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, + ZPR.LocalsSize, TII, MachineInstr::FrameDestroy, false, + NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset); + CFAOffset -= ZPR.LocalsSize; + } - if (ZPRLocalsSize) { - emitFrameOffset(MBB, ZPRRestoreBegin, DL, AArch64::SP, AArch64::SP, - ZPRLocalsSize, TII, MachineInstr::FrameDestroy, false, - false, nullptr, EmitCFI && !HasFP, CFAOffset); - CFAOffset -= ZPRLocalsSize; - } + StackOffset SVECalleeSavesToDealloc = SVECalleeSavesSize; + if (SVELayout == SVEStackLayout::Split && + (PPR.LocalsSize || ZPR.CalleeSavesSize)) { + assert(PPRRange.Begin == ZPRRange.End && + "Expected PPR restores after ZPR"); + emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP, + PPR.LocalsSize + ZPR.CalleeSavesSize, TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, + &HasWinCFI, EmitCFI && !HasFP, CFAOffset); + CFAOffset -= PPR.LocalsSize + ZPR.CalleeSavesSize; + SVECalleeSavesToDealloc -= ZPR.CalleeSavesSize; + } - if (PPRLocalsSize || ZPRCalleeSavedSize) { - assert(PPRRestoreBegin == ZPRRestoreEnd && - "Expected PPR restores after ZPR"); - emitFrameOffset(MBB, PPRRestoreBegin, DL, AArch64::SP, AArch64::SP, - PPRLocalsSize + ZPRCalleeSavedSize, TII, - MachineInstr::FrameDestroy, false, false, nullptr, - EmitCFI && !HasFP, CFAOffset); - CFAOffset -= PPRLocalsSize + ZPRCalleeSavedSize; - } - if (PPRCalleeSavedSize) { - emitFrameOffset(MBB, PPRRestoreEnd, DL, AArch64::SP, AArch64::SP, - PPRCalleeSavedSize, TII, MachineInstr::FrameDestroy, - false, false, nullptr, EmitCFI && !HasFP, CFAOffset); + // If split SVE is on, this dealloc PPRs, otherwise, deallocs ZPRs + PPRs: + if (SVECalleeSavesToDealloc) + emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, + SVECalleeSavesToDealloc, TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, + &HasWinCFI, EmitCFI && !HasFP, CFAOffset); } - // We only emit CFI information for ZPRs so emit CFI after the ZPR restores. if (EmitCFI) - emitCalleeSavedSVERestores(ZPRRestoreEnd); + emitCalleeSavedSVERestores( + SVELayout == SVEStackLayout::Split ? ZPRRange.End : PPRRange.End); } if (!HasFP) { diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h index a1c9b34..bccadda 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h @@ -27,11 +27,23 @@ class AArch64Subtarget; class AArch64FunctionInfo; class AArch64FrameLowering; +struct SVEFrameSizes { + struct { + StackOffset CalleeSavesSize, LocalsSize; + } PPR, ZPR; +}; + class AArch64PrologueEpilogueCommon { public: AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL); + enum class SVEStackLayout { + Default, + Split, + CalleeSavesAboveFrameRecord, + }; + protected: bool requiresGetVGCall() const; @@ -53,6 +65,8 @@ protected: bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const; + SVEFrameSizes getSVEStackFrameSizes() const; + MachineFunction &MF; MachineBasicBlock &MBB; @@ -68,6 +82,7 @@ protected: bool IsFunclet = false; // Note: Set in derived constructors. bool NeedsWinCFI = false; // Note: Can be changed in emitFramePointerSetup. bool HomPrologEpilog = false; // Note: Set in derived constructors. + SVEStackLayout SVELayout = SVEStackLayout::Default; // Note: "HasWinCFI" is mutable as it can change in any "emit" function. mutable bool HasWinCFI = false; diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td index 65b752e..9438917 100644 --- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td +++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td @@ -816,8 +816,8 @@ def : BTI<"jc", 0b110>; // TLBI (translation lookaside buffer invalidate) instruction options. //===----------------------------------------------------------------------===// -class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, - bits<3> op2, bit needsreg> { +class TLBICommon<string name, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2, bit needsreg> { string Name = name; bits<14> Encoding; let Encoding{13-11} = op1; @@ -830,131 +830,150 @@ class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, code RequiresStr = [{ { }] # !interleave(Requires # ExtraRequires, [{, }]) # [{ } }]; } -def TLBITable : GenericTable { - let FilterClass = "TLBIEntry"; - let CppTypeName = "TLBI"; - let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"]; - - let PrimaryKey = ["Encoding"]; - let PrimaryKeyName = "lookupTLBIByEncoding"; +class TLBIEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2, bit needsreg> + : TLBICommon<name, op1, crn, crm, op2, needsreg>; + +class TLBIPEntry<string name, bits<3> op1, bits<4> crn, bits<4> crm, + bits<3> op2, bit needsreg> + : TLBICommon<name, op1, crn, crm, op2, needsreg>; + +multiclass TLBITableBase { + def NAME # Table : GenericTable { + let FilterClass = NAME # "Entry"; + let CppTypeName = NAME; + let Fields = ["Name", "Encoding", "NeedsReg", "RequiresStr"]; + let PrimaryKey = ["Encoding"]; + let PrimaryKeyName = "lookup" # NAME # "ByEncoding"; + } + def lookup # NAME # ByName : SearchIndex { + let Table = !cast<GenericTable>(NAME # "Table"); + let Key = ["Name"]; + } } -def lookupTLBIByName : SearchIndex { - let Table = TLBITable; - let Key = ["Name"]; -} +defm TLBI : TLBITableBase; +defm TLBIP : TLBITableBase; -multiclass TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm, +multiclass TLBI<string name, bit hasTLBIP, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2, bit needsreg = 1> { def : TLBIEntry<name, op1, crn, crm, op2, needsreg>; def : TLBIEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> { let Encoding{7} = 1; let ExtraRequires = ["AArch64::FeatureXS"]; } + if !eq(hasTLBIP, true) then { + def : TLBIPEntry<name, op1, crn, crm, op2, needsreg>; + def : TLBIPEntry<!strconcat(name, "nXS"), op1, crn, crm, op2, needsreg> { + let Encoding{7} = 1; + let ExtraRequires = ["AArch64::FeatureXS"]; + } + } } -defm : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>; -defm : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>; -defm : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>; -defm : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>; -defm : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>; -defm : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>; -defm : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>; -defm : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>; -defm : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>; -defm : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>; -defm : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>; -defm : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>; -defm : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>; -defm : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>; -defm : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>; -defm : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>; -defm : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>; -defm : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>; -defm : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>; -defm : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>; -defm : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>; -defm : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>; -defm : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>; -defm : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>; +// hasTLBIP op1 CRn CRm op2 needsreg +defm : TLBI<"IPAS2E1IS", 1, 0b100, 0b1000, 0b0000, 0b001>; +defm : TLBI<"IPAS2LE1IS", 1, 0b100, 0b1000, 0b0000, 0b101>; +defm : TLBI<"VMALLE1IS", 0, 0b000, 0b1000, 0b0011, 0b000, 0>; +defm : TLBI<"ALLE2IS", 0, 0b100, 0b1000, 0b0011, 0b000, 0>; +defm : TLBI<"ALLE3IS", 0, 0b110, 0b1000, 0b0011, 0b000, 0>; +defm : TLBI<"VAE1IS", 1, 0b000, 0b1000, 0b0011, 0b001>; +defm : TLBI<"VAE2IS", 1, 0b100, 0b1000, 0b0011, 0b001>; +defm : TLBI<"VAE3IS", 1, 0b110, 0b1000, 0b0011, 0b001>; +defm : TLBI<"ASIDE1IS", 0, 0b000, 0b1000, 0b0011, 0b010>; +defm : TLBI<"VAAE1IS", 1, 0b000, 0b1000, 0b0011, 0b011>; +defm : TLBI<"ALLE1IS", 0, 0b100, 0b1000, 0b0011, 0b100, 0>; +defm : TLBI<"VALE1IS", 1, 0b000, 0b1000, 0b0011, 0b101>; +defm : TLBI<"VALE2IS", 1, 0b100, 0b1000, 0b0011, 0b101>; +defm : TLBI<"VALE3IS", 1, 0b110, 0b1000, 0b0011, 0b101>; +defm : TLBI<"VMALLS12E1IS", 0, 0b100, 0b1000, 0b0011, 0b110, 0>; +defm : TLBI<"VAALE1IS", 1, 0b000, 0b1000, 0b0011, 0b111>; +defm : TLBI<"IPAS2E1", 1, 0b100, 0b1000, 0b0100, 0b001>; +defm : TLBI<"IPAS2LE1", 1, 0b100, 0b1000, 0b0100, 0b101>; +defm : TLBI<"VMALLE1", 0, 0b000, 0b1000, 0b0111, 0b000, 0>; +defm : TLBI<"ALLE2", 0, 0b100, 0b1000, 0b0111, 0b000, 0>; +defm : TLBI<"ALLE3", 0, 0b110, 0b1000, 0b0111, 0b000, 0>; +defm : TLBI<"VAE1", 1, 0b000, 0b1000, 0b0111, 0b001>; +defm : TLBI<"VAE2", 1, 0b100, 0b1000, 0b0111, 0b001>; +defm : TLBI<"VAE3", 1, 0b110, 0b1000, 0b0111, 0b001>; +defm : TLBI<"ASIDE1", 0, 0b000, 0b1000, 0b0111, 0b010>; +defm : TLBI<"VAAE1", 1, 0b000, 0b1000, 0b0111, 0b011>; +defm : TLBI<"ALLE1", 0, 0b100, 0b1000, 0b0111, 0b100, 0>; +defm : TLBI<"VALE1", 1, 0b000, 0b1000, 0b0111, 0b101>; +defm : TLBI<"VALE2", 1, 0b100, 0b1000, 0b0111, 0b101>; +defm : TLBI<"VALE3", 1, 0b110, 0b1000, 0b0111, 0b101>; +defm : TLBI<"VMALLS12E1", 0, 0b100, 0b1000, 0b0111, 0b110, 0>; +defm : TLBI<"VAALE1", 1, 0b000, 0b1000, 0b0111, 0b111>; // Armv8.4-A Translation Lookaside Buffer Instructions (TLBI) let Requires = ["AArch64::FeatureTLB_RMI"] in { // Armv8.4-A Outer Sharable TLB Maintenance instructions: -// op1 CRn CRm op2 -defm : TLBI<"VMALLE1OS", 0b000, 0b1000, 0b0001, 0b000, 0>; -defm : TLBI<"VAE1OS", 0b000, 0b1000, 0b0001, 0b001>; -defm : TLBI<"ASIDE1OS", 0b000, 0b1000, 0b0001, 0b010>; -defm : TLBI<"VAAE1OS", 0b000, 0b1000, 0b0001, 0b011>; -defm : TLBI<"VALE1OS", 0b000, 0b1000, 0b0001, 0b101>; -defm : TLBI<"VAALE1OS", 0b000, 0b1000, 0b0001, 0b111>; -defm : TLBI<"IPAS2E1OS", 0b100, 0b1000, 0b0100, 0b000>; -defm : TLBI<"IPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b100>; -defm : TLBI<"VAE2OS", 0b100, 0b1000, 0b0001, 0b001>; -defm : TLBI<"VALE2OS", 0b100, 0b1000, 0b0001, 0b101>; -defm : TLBI<"VMALLS12E1OS", 0b100, 0b1000, 0b0001, 0b110, 0>; -defm : TLBI<"VAE3OS", 0b110, 0b1000, 0b0001, 0b001>; -defm : TLBI<"VALE3OS", 0b110, 0b1000, 0b0001, 0b101>; -defm : TLBI<"ALLE2OS", 0b100, 0b1000, 0b0001, 0b000, 0>; -defm : TLBI<"ALLE1OS", 0b100, 0b1000, 0b0001, 0b100, 0>; -defm : TLBI<"ALLE3OS", 0b110, 0b1000, 0b0001, 0b000, 0>; +// hasTLBIP op1 CRn CRm op2 needsreg +defm : TLBI<"VMALLE1OS", 0, 0b000, 0b1000, 0b0001, 0b000, 0>; +defm : TLBI<"VAE1OS", 1, 0b000, 0b1000, 0b0001, 0b001>; +defm : TLBI<"ASIDE1OS", 0, 0b000, 0b1000, 0b0001, 0b010>; +defm : TLBI<"VAAE1OS", 1, 0b000, 0b1000, 0b0001, 0b011>; +defm : TLBI<"VALE1OS", 1, 0b000, 0b1000, 0b0001, 0b101>; +defm : TLBI<"VAALE1OS", 1, 0b000, 0b1000, 0b0001, 0b111>; +defm : TLBI<"IPAS2E1OS", 1, 0b100, 0b1000, 0b0100, 0b000>; +defm : TLBI<"IPAS2LE1OS", 1, 0b100, 0b1000, 0b0100, 0b100>; +defm : TLBI<"VAE2OS", 1, 0b100, 0b1000, 0b0001, 0b001>; +defm : TLBI<"VALE2OS", 1, 0b100, 0b1000, 0b0001, 0b101>; +defm : TLBI<"VMALLS12E1OS", 0, 0b100, 0b1000, 0b0001, 0b110, 0>; +defm : TLBI<"VAE3OS", 1, 0b110, 0b1000, 0b0001, 0b001>; +defm : TLBI<"VALE3OS", 1, 0b110, 0b1000, 0b0001, 0b101>; +defm : TLBI<"ALLE2OS", 0, 0b100, 0b1000, 0b0001, 0b000, 0>; +defm : TLBI<"ALLE1OS", 0, 0b100, 0b1000, 0b0001, 0b100, 0>; +defm : TLBI<"ALLE3OS", 0, 0b110, 0b1000, 0b0001, 0b000, 0>; // Armv8.4-A TLB Range Maintenance instructions: -// op1 CRn CRm op2 -defm : TLBI<"RVAE1", 0b000, 0b1000, 0b0110, 0b001>; -defm : TLBI<"RVAAE1", 0b000, 0b1000, 0b0110, 0b011>; -defm : TLBI<"RVALE1", 0b000, 0b1000, 0b0110, 0b101>; -defm : TLBI<"RVAALE1", 0b000, 0b1000, 0b0110, 0b111>; -defm : TLBI<"RVAE1IS", 0b000, 0b1000, 0b0010, 0b001>; -defm : TLBI<"RVAAE1IS", 0b000, 0b1000, 0b0010, 0b011>; -defm : TLBI<"RVALE1IS", 0b000, 0b1000, 0b0010, 0b101>; -defm : TLBI<"RVAALE1IS", 0b000, 0b1000, 0b0010, 0b111>; -defm : TLBI<"RVAE1OS", 0b000, 0b1000, 0b0101, 0b001>; -defm : TLBI<"RVAAE1OS", 0b000, 0b1000, 0b0101, 0b011>; -defm : TLBI<"RVALE1OS", 0b000, 0b1000, 0b0101, 0b101>; -defm : TLBI<"RVAALE1OS", 0b000, 0b1000, 0b0101, 0b111>; -defm : TLBI<"RIPAS2E1IS", 0b100, 0b1000, 0b0000, 0b010>; -defm : TLBI<"RIPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b110>; -defm : TLBI<"RIPAS2E1", 0b100, 0b1000, 0b0100, 0b010>; -defm : TLBI<"RIPAS2LE1", 0b100, 0b1000, 0b0100, 0b110>; -defm : TLBI<"RIPAS2E1OS", 0b100, 0b1000, 0b0100, 0b011>; -defm : TLBI<"RIPAS2LE1OS", 0b100, 0b1000, 0b0100, 0b111>; -defm : TLBI<"RVAE2", 0b100, 0b1000, 0b0110, 0b001>; -defm : TLBI<"RVALE2", 0b100, 0b1000, 0b0110, 0b101>; -defm : TLBI<"RVAE2IS", 0b100, 0b1000, 0b0010, 0b001>; -defm : TLBI<"RVALE2IS", 0b100, 0b1000, 0b0010, 0b101>; -defm : TLBI<"RVAE2OS", 0b100, 0b1000, 0b0101, 0b001>; -defm : TLBI<"RVALE2OS", 0b100, 0b1000, 0b0101, 0b101>; -defm : TLBI<"RVAE3", 0b110, 0b1000, 0b0110, 0b001>; -defm : TLBI<"RVALE3", 0b110, 0b1000, 0b0110, 0b101>; -defm : TLBI<"RVAE3IS", 0b110, 0b1000, 0b0010, 0b001>; -defm : TLBI<"RVALE3IS", 0b110, 0b1000, 0b0010, 0b101>; -defm : TLBI<"RVAE3OS", 0b110, 0b1000, 0b0101, 0b001>; -defm : TLBI<"RVALE3OS", 0b110, 0b1000, 0b0101, 0b101>; +// hasTLBIP op1 CRn CRm op2 needsreg +defm : TLBI<"RVAE1", 1, 0b000, 0b1000, 0b0110, 0b001>; +defm : TLBI<"RVAAE1", 1, 0b000, 0b1000, 0b0110, 0b011>; +defm : TLBI<"RVALE1", 1, 0b000, 0b1000, 0b0110, 0b101>; +defm : TLBI<"RVAALE1", 1, 0b000, 0b1000, 0b0110, 0b111>; +defm : TLBI<"RVAE1IS", 1, 0b000, 0b1000, 0b0010, 0b001>; +defm : TLBI<"RVAAE1IS", 1, 0b000, 0b1000, 0b0010, 0b011>; +defm : TLBI<"RVALE1IS", 1, 0b000, 0b1000, 0b0010, 0b101>; +defm : TLBI<"RVAALE1IS", 1, 0b000, 0b1000, 0b0010, 0b111>; +defm : TLBI<"RVAE1OS", 1, 0b000, 0b1000, 0b0101, 0b001>; +defm : TLBI<"RVAAE1OS", 1, 0b000, 0b1000, 0b0101, 0b011>; +defm : TLBI<"RVALE1OS", 1, 0b000, 0b1000, 0b0101, 0b101>; +defm : TLBI<"RVAALE1OS", 1, 0b000, 0b1000, 0b0101, 0b111>; +defm : TLBI<"RIPAS2E1IS", 1, 0b100, 0b1000, 0b0000, 0b010>; +defm : TLBI<"RIPAS2LE1IS", 1, 0b100, 0b1000, 0b0000, 0b110>; +defm : TLBI<"RIPAS2E1", 1, 0b100, 0b1000, 0b0100, 0b010>; +defm : TLBI<"RIPAS2LE1", 1, 0b100, 0b1000, 0b0100, 0b110>; +defm : TLBI<"RIPAS2E1OS", 1, 0b100, 0b1000, 0b0100, 0b011>; +defm : TLBI<"RIPAS2LE1OS", 1, 0b100, 0b1000, 0b0100, 0b111>; +defm : TLBI<"RVAE2", 1, 0b100, 0b1000, 0b0110, 0b001>; +defm : TLBI<"RVALE2", 1, 0b100, 0b1000, 0b0110, 0b101>; +defm : TLBI<"RVAE2IS", 1, 0b100, 0b1000, 0b0010, 0b001>; +defm : TLBI<"RVALE2IS", 1, 0b100, 0b1000, 0b0010, 0b101>; +defm : TLBI<"RVAE2OS", 1, 0b100, 0b1000, 0b0101, 0b001>; +defm : TLBI<"RVALE2OS", 1, 0b100, 0b1000, 0b0101, 0b101>; +defm : TLBI<"RVAE3", 1, 0b110, 0b1000, 0b0110, 0b001>; +defm : TLBI<"RVALE3", 1, 0b110, 0b1000, 0b0110, 0b101>; +defm : TLBI<"RVAE3IS", 1, 0b110, 0b1000, 0b0010, 0b001>; +defm : TLBI<"RVALE3IS", 1, 0b110, 0b1000, 0b0010, 0b101>; +defm : TLBI<"RVAE3OS", 1, 0b110, 0b1000, 0b0101, 0b001>; +defm : TLBI<"RVALE3OS", 1, 0b110, 0b1000, 0b0101, 0b101>; } //FeatureTLB_RMI // Armv9-A Realm Management Extension TLBI Instructions let Requires = ["AArch64::FeatureRME"] in { -defm : TLBI<"RPAOS", 0b110, 0b1000, 0b0100, 0b011>; -defm : TLBI<"RPALOS", 0b110, 0b1000, 0b0100, 0b111>; -defm : TLBI<"PAALLOS", 0b110, 0b1000, 0b0001, 0b100, 0>; -defm : TLBI<"PAALL", 0b110, 0b1000, 0b0111, 0b100, 0>; +defm : TLBI<"RPAOS", 0, 0b110, 0b1000, 0b0100, 0b011>; +defm : TLBI<"RPALOS", 0, 0b110, 0b1000, 0b0100, 0b111>; +defm : TLBI<"PAALLOS", 0, 0b110, 0b1000, 0b0001, 0b100, 0>; +defm : TLBI<"PAALL", 0, 0b110, 0b1000, 0b0111, 0b100, 0>; } // Armv9.5-A TLBI VMALL for Dirty State let Requires = ["AArch64::FeatureTLBIW"] in { -// op1, CRn, CRm, op2, needsreg -defm : TLBI<"VMALLWS2E1", 0b100, 0b1000, 0b0110, 0b010, 0>; -defm : TLBI<"VMALLWS2E1IS", 0b100, 0b1000, 0b0010, 0b010, 0>; -defm : TLBI<"VMALLWS2E1OS", 0b100, 0b1000, 0b0101, 0b010, 0>; +// op1, CRn, CRm, op2, needsreg +defm : TLBI<"VMALLWS2E1", 0, 0b100, 0b1000, 0b0110, 0b010, 0>; +defm : TLBI<"VMALLWS2E1IS", 0, 0b100, 0b1000, 0b0010, 0b010, 0>; +defm : TLBI<"VMALLWS2E1OS", 0, 0b100, 0b1000, 0b0101, 0b010, 0>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 3641e22..2c3870c 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -4020,23 +4020,23 @@ bool AArch64AsmParser::parseSyspAlias(StringRef Name, SMLoc NameLoc, if (HasnXSQualifier) { Op = Op.drop_back(3); } - const AArch64TLBI::TLBI *TLBIorig = AArch64TLBI::lookupTLBIByName(Op); - if (!TLBIorig) + const AArch64TLBIP::TLBIP *TLBIPorig = AArch64TLBIP::lookupTLBIPByName(Op); + if (!TLBIPorig) return TokError("invalid operand for TLBIP instruction"); - const AArch64TLBI::TLBI TLBI( - TLBIorig->Name, TLBIorig->Encoding | (HasnXSQualifier ? (1 << 7) : 0), - TLBIorig->NeedsReg, + const AArch64TLBIP::TLBIP TLBIP( + TLBIPorig->Name, TLBIPorig->Encoding | (HasnXSQualifier ? (1 << 7) : 0), + TLBIPorig->NeedsReg, HasnXSQualifier - ? TLBIorig->FeaturesRequired | FeatureBitset({AArch64::FeatureXS}) - : TLBIorig->FeaturesRequired); - if (!TLBI.haveFeatures(getSTI().getFeatureBits())) { + ? TLBIPorig->FeaturesRequired | FeatureBitset({AArch64::FeatureXS}) + : TLBIPorig->FeaturesRequired); + if (!TLBIP.haveFeatures(getSTI().getFeatureBits())) { std::string Name = - std::string(TLBI.Name) + (HasnXSQualifier ? "nXS" : ""); + std::string(TLBIP.Name) + (HasnXSQualifier ? "nXS" : ""); std::string Str("TLBIP " + Name + " requires: "); - setRequiredFeatureString(TLBI.getRequiredFeatures(), Str); + setRequiredFeatureString(TLBIP.getRequiredFeatures(), Str); return TokError(Str); } - createSysAlias(TLBI.Encoding, Operands, S); + createSysAlias(TLBIP.Encoding, Operands, S); } Lex(); // Eat operand. diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index 2552ee3..35bd244 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1066,12 +1066,13 @@ bool AArch64InstPrinter::printSyspAlias(const MCInst *MI, Encoding &= ~(1 << 7); } - const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByEncoding(Encoding); - if (!TLBI || !TLBI->haveFeatures(STI.getFeatureBits())) + const AArch64TLBIP::TLBIP *TLBIP = + AArch64TLBIP::lookupTLBIPByEncoding(Encoding); + if (!TLBIP || !TLBIP->haveFeatures(STI.getFeatureBits())) return false; Ins = "tlbip\t"; - Name = std::string(TLBI->Name); + Name = std::string(TLBIP->Name); if (CnVal == 9) Name += "nXS"; } else diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 7767028..d6cb0e8 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -186,6 +186,13 @@ namespace llvm { } namespace llvm { +namespace AArch64TLBIP { +#define GET_TLBIPTable_IMPL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64TLBIP +} // namespace llvm + +namespace llvm { namespace AArch64SVCR { #define GET_SVCRsList_IMPL #include "AArch64GenSystemOperands.inc" diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index a4ee963..fea33ef 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -795,6 +795,14 @@ namespace AArch64TLBI { #include "AArch64GenSystemOperands.inc" } +namespace AArch64TLBIP { +struct TLBIP : SysAliasReg { + using SysAliasReg::SysAliasReg; +}; +#define GET_TLBIPTable_DECL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64TLBIP + namespace AArch64II { /// Target Operand Flag enum. enum TOF { |