diff options
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 86 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp | 70 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 2 |
4 files changed, 83 insertions, 80 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6965116..9926a4d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26196,9 +26196,10 @@ static SDValue performFlagSettingCombine(SDNode *N, return DCI.CombineTo(N, Res, SDValue(N, 1)); } - // Combine identical generic nodes into this node, re-using the result. + // Combine equivalent generic nodes into this node, re-using the result. if (SDNode *Generic = DCI.DAG.getNodeIfExists( - GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS})) + GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}, + /*AllowCommute=*/true)) DCI.CombineTo(Generic, SDValue(N, 0)); return SDValue(); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b8761d97..30dfcf2b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5064,17 +5064,15 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool RenamableSrc) const { if (AArch64::GPR32spRegClass.contains(DestReg) && (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { // If either operand is WSP, expand to ADD #0. if (Subtarget.hasZeroCycleRegMoveGPR64() && !Subtarget.hasZeroCycleRegMoveGPR32()) { // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. - MCRegister DestRegX = TRI->getMatchingSuperReg( - DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); - MCRegister SrcRegX = TRI->getMatchingSuperReg( - SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass); + MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); // This instruction is reading and writing X registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegX, but a proper @@ -5097,14 +5095,14 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else if (Subtarget.hasZeroCycleRegMoveGPR64() && !Subtarget.hasZeroCycleRegMoveGPR32()) { // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. - MCRegister DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, - &AArch64::GPR64spRegClass); + MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); assert(DestRegX.isValid() && "Destination super-reg not valid"); MCRegister SrcRegX = SrcReg == AArch64::WZR ? AArch64::XZR - : TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, - &AArch64::GPR64spRegClass); + : RI.getMatchingSuperReg(SrcReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); assert(SrcRegX.isValid() && "Source super-reg not valid"); // This instruction is reading and writing X registers. This may upset // the register scavenger and machine verifier, so we need to indicate @@ -5334,11 +5332,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Subtarget.hasZeroCycleRegMoveFPR128() && !Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::dsub, - &AArch64::FPR128RegClass); - MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::dsub, - &AArch64::FPR128RegClass); + MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, + &AArch64::FPR128RegClass); // This instruction is reading and writing Q registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegQ, but a proper @@ -5359,11 +5356,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Subtarget.hasZeroCycleRegMoveFPR128() && !Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::ssub, - &AArch64::FPR128RegClass); - MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub, - &AArch64::FPR128RegClass); + MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, + &AArch64::FPR128RegClass); // This instruction is reading and writing Q registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegQ, but a proper @@ -5374,11 +5370,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); } else if (Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub, - &AArch64::FPR64RegClass); - MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub, - &AArch64::FPR64RegClass); + MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub, + &AArch64::FPR64RegClass); + MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, + &AArch64::FPR64RegClass); // This instruction is reading and writing D registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegD, but a proper @@ -5398,11 +5393,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Subtarget.hasZeroCycleRegMoveFPR128() && !Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::hsub, - &AArch64::FPR128RegClass); - MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub, - &AArch64::FPR128RegClass); + MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, + &AArch64::FPR128RegClass); // This instruction is reading and writing Q registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegQ, but a proper @@ -5413,11 +5407,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); } else if (Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub, - &AArch64::FPR64RegClass); - MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub, - &AArch64::FPR64RegClass); + MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub, + &AArch64::FPR64RegClass); + MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, + &AArch64::FPR64RegClass); // This instruction is reading and writing D registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegD, but a proper @@ -5441,11 +5434,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Subtarget.hasZeroCycleRegMoveFPR128() && !Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::bsub, - &AArch64::FPR128RegClass); - MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub, - &AArch64::FPR128RegClass); + MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub, + &AArch64::FPR128RegClass); + MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, + &AArch64::FPR128RegClass); // This instruction is reading and writing Q registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegQ, but a proper @@ -5456,11 +5448,10 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); } else if (Subtarget.hasZeroCycleRegMoveFPR64() && !Subtarget.hasZeroCycleRegMoveFPR32()) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub, - &AArch64::FPR64RegClass); - MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub, - &AArch64::FPR64RegClass); + MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub, + &AArch64::FPR64RegClass); + MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, + &AArch64::FPR64RegClass); // This instruction is reading and writing D registers. This may upset // the register scavenger and machine verifier, so we need to indicate // that we are reading an undefined value from SrcRegD, but a proper @@ -5532,9 +5523,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } #ifndef NDEBUG - const TargetRegisterInfo &TRI = getRegisterInfo(); - errs() << TRI.getRegAsmName(DestReg) << " = COPY " - << TRI.getRegAsmName(SrcReg) << "\n"; + errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg) + << "\n"; #endif llvm_unreachable("unimplemented reg-to-reg copy"); } diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index f110558..7e03b97 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -1360,14 +1360,24 @@ void AArch64EpilogueEmitter::emitEpilogue() { } bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes); - // Assume we can't combine the last pop with the sp restore. - bool CombineAfterCSRBump = false; + + unsigned ProloguePopSize = PrologueSaveSize; if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { + // With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack + // that needs to be popped until we reach the start of the SVE save area. + // The "FixedObject" stack occurs after the SVE area and must be popped + // later. + ProloguePopSize -= FixedObject; AfterCSRPopSize += FixedObject; - } else if (!CombineSPBump && PrologueSaveSize != 0) { + } + + // Assume we can't combine the last pop with the sp restore. + if (!CombineSPBump && ProloguePopSize != 0) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION || - AArch64InstrInfo::isSEHInstruction(*Pop)) + AArch64InstrInfo::isSEHInstruction(*Pop) || + (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord && + isPartOfSVECalleeSaves(Pop))) Pop = std::prev(Pop); // Converting the last ldp to a post-index ldp is valid only if the last // ldp's offset is 0. @@ -1377,18 +1387,27 @@ void AArch64EpilogueEmitter::emitEpilogue() { // may clobber), convert it to a post-index ldp. if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) { convertCalleeSaveRestoreToSPPrePostIncDec( - Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy, - PrologueSaveSize); + Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy, + ProloguePopSize); + } else if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { + MachineBasicBlock::iterator AfterLastPop = std::next(Pop); + if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop)) + ++AfterLastPop; + // If not, and CalleeSavesAboveFrameRecord is enabled, deallocate + // callee-save non-SVE registers to move the stack pointer to the start of + // the SVE area. + emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(ProloguePopSize), TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, + &HasWinCFI); } else { - // If not, make sure to emit an add after the last ldp. + // Otherwise, make sure to emit an add after the last ldp. // We're doing this by transferring the size to be restored from the // adjustment *before* the CSR pops to the adjustment *after* the CSR // pops. - AfterCSRPopSize += PrologueSaveSize; - CombineAfterCSRBump = true; + AfterCSRPopSize += ProloguePopSize; } } - // Move past the restores of the callee-saved registers. // If we plan on combining the sp bump of the local stack size and the callee // save stack size, we might need to adjust the CSR save and restore offsets. @@ -1419,6 +1438,17 @@ void AArch64EpilogueEmitter::emitEpilogue() { --SEHEpilogueStartI; } + // Determine the ranges of SVE callee-saves. This is done before emitting any + // code at the end of the epilogue (for Swift async), which can get in the way + // of finding SVE callee-saves with CalleeSavesAboveFrameRecord. + auto [PPR, ZPR] = getSVEStackFrameSizes(); + auto [PPRRange, ZPRRange] = partitionSVECS( + MBB, + SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord + ? MBB.getFirstTerminator() + : FirstGPRRestoreI, + PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true); + if (HasFP && AFI->hasSwiftAsyncContext()) emitSwiftAsyncContextFramePointer(EpilogueEndI, DL); @@ -1441,14 +1471,6 @@ void AArch64EpilogueEmitter::emitEpilogue() { NumBytes -= PrologueSaveSize; assert(NumBytes >= 0 && "Negative stack allocation size!?"); - auto [PPR, ZPR] = getSVEStackFrameSizes(); - auto [PPRRange, ZPRRange] = partitionSVECS( - MBB, - SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord - ? MBB.getFirstTerminator() - : FirstGPRRestoreI, - PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true); - StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize; StackOffset SVEStackSize = SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize; @@ -1467,16 +1489,6 @@ void AArch64EpilogueEmitter::emitEpilogue() { NeedsWinCFI, &HasWinCFI); } - // Deallocate callee-save non-SVE registers. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); - - // Deallocate fixed objects. - emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(FixedObject), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); - // Deallocate callee-save SVE registers. emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false, @@ -1619,7 +1631,7 @@ void AArch64EpilogueEmitter::emitEpilogue() { MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI, - StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0)); + StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore)); } } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 12ddf47..53b00e8 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -273,7 +273,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) { EpilogueVectorizationMinVF = 8; MaxInterleaveFactor = 4; ScatterOverhead = 13; - LLVM_FALLTHROUGH; + [[fallthrough]]; case NeoverseN2: case NeoverseN3: PrefFunctionAlignment = Align(16); |