diff options
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 357 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 64 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64RegisterInfo.td | 11 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 19 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64Subtarget.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/SMEInstrFormats.td | 14 |
8 files changed, 43 insertions, 438 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 4357264d..c76689f 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -345,12 +345,6 @@ static unsigned getStackHazardSize(const MachineFunction &MF) { return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize(); } -/// Returns true if PPRs are spilled as ZPRs. -static bool arePPRsSpilledAsZPR(const MachineFunction &MF) { - return MF.getSubtarget().getRegisterInfo()->getSpillSize( - AArch64::PPRRegClass) == 16; -} - StackOffset AArch64FrameLowering::getZPRStackSize(const MachineFunction &MF) const { const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); @@ -1966,8 +1960,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI; break; case RegPairInfo::PPR: - StrOpc = - Size == 16 ? AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO : AArch64::STR_PXI; + StrOpc = AArch64::STR_PXI; break; case RegPairInfo::VG: StrOpc = AArch64::STRXui; @@ -2178,8 +2171,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI; break; case RegPairInfo::PPR: - LdrOpc = Size == 16 ? AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO - : AArch64::LDR_PXI; + LdrOpc = AArch64::LDR_PXI; break; case RegPairInfo::VG: continue; @@ -2286,9 +2278,7 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI, // Returns true if the LDST MachineInstr \p MI is a PPR access. static bool isPPRAccess(const MachineInstr &MI) { - return MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO && - MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO && - AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()); + return AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()); } // Check if a Hazard slot is needed for the current function, and if so create @@ -2390,12 +2380,6 @@ void AArch64FrameLowering::determineStackHazardSlot( return; } - if (arePPRsSpilledAsZPR(MF)) { - LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with " - "-aarch64-enable-zpr-predicate-spills"); - return; - } - // If another calling convention is explicitly set FPRs can't be promoted to // ZPR callee-saves. if (!is_contained({CallingConv::C, CallingConv::Fast, @@ -2519,14 +2503,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, continue; } - // Always save P4 when PPR spills are ZPR-sized and a predicate above p8 is - // spilled. If all of p0-p3 are used as return values p4 is must be free - // to reload p8-p15. - if (RegInfo->getSpillSize(AArch64::PPRRegClass) == 16 && - AArch64::PPR_p8to15RegClass.contains(Reg)) { - SavedRegs.set(AArch64::P4); - } - // MachO's compact unwind format relies on all registers being stored in // pairs. // FIXME: the usual format is actually better if unwinding isn't needed. @@ -2587,7 +2563,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, auto SpillSize = TRI->getSpillSize(*RC); bool IsZPR = AArch64::ZPRRegClass.contains(Reg); bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg); - if (IsZPR || (IsPPR && arePPRsSpilledAsZPR(MF))) + if (IsZPR) ZPRCSStackSize += SpillSize; else if (IsPPR) PPRCSStackSize += SpillSize; @@ -2902,7 +2878,7 @@ static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, StackTop += MFI.getObjectSize(FI); StackTop = alignTo(StackTop, Alignment); - assert(StackTop < std::numeric_limits<int64_t>::max() && + assert(StackTop < (uint64_t)std::numeric_limits<int64_t>::max() && "SVE StackTop far too large?!"); int64_t Offset = -int64_t(StackTop); @@ -2961,314 +2937,8 @@ static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, return SVEStack; } -/// Attempts to scavenge a register from \p ScavengeableRegs given the used -/// registers in \p UsedRegs. -static Register tryScavengeRegister(LiveRegUnits const &UsedRegs, - BitVector const &ScavengeableRegs, - Register PreferredReg) { - if (PreferredReg != AArch64::NoRegister && UsedRegs.available(PreferredReg)) - return PreferredReg; - for (auto Reg : ScavengeableRegs.set_bits()) { - if (UsedRegs.available(Reg)) - return Reg; - } - return AArch64::NoRegister; -} - -/// Propagates frame-setup/destroy flags from \p SourceMI to all instructions in -/// \p MachineInstrs. -static void propagateFrameFlags(MachineInstr &SourceMI, - ArrayRef<MachineInstr *> MachineInstrs) { - for (MachineInstr *MI : MachineInstrs) { - if (SourceMI.getFlag(MachineInstr::FrameSetup)) - MI->setFlag(MachineInstr::FrameSetup); - if (SourceMI.getFlag(MachineInstr::FrameDestroy)) - MI->setFlag(MachineInstr::FrameDestroy); - } -} - -/// RAII helper class for scavenging or spilling a register. On construction -/// attempts to find a free register of class \p RC (given \p UsedRegs and \p -/// AllocatableRegs), if no register can be found spills \p SpillCandidate to \p -/// MaybeSpillFI to free a register. The free'd register is returned via the \p -/// FreeReg output parameter. On destruction, if there is a spill, its previous -/// value is reloaded. The spilling and scavenging is only valid at the -/// insertion point \p MBBI, this class should _not_ be used in places that -/// create or manipulate basic blocks, moving the expected insertion point. -struct ScopedScavengeOrSpill { - ScopedScavengeOrSpill(const ScopedScavengeOrSpill &) = delete; - ScopedScavengeOrSpill(ScopedScavengeOrSpill &&) = delete; - - ScopedScavengeOrSpill(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - Register SpillCandidate, const TargetRegisterClass &RC, - LiveRegUnits const &UsedRegs, - BitVector const &AllocatableRegs, - std::optional<int> *MaybeSpillFI, - Register PreferredReg = AArch64::NoRegister) - : MBB(MBB), MBBI(MBBI), RC(RC), TII(static_cast<const AArch64InstrInfo &>( - *MF.getSubtarget().getInstrInfo())), - TRI(*MF.getSubtarget().getRegisterInfo()) { - FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs, PreferredReg); - if (FreeReg != AArch64::NoRegister) - return; - assert(MaybeSpillFI && "Expected emergency spill slot FI information " - "(attempted to spill in prologue/epilogue?)"); - if (!MaybeSpillFI->has_value()) { - MachineFrameInfo &MFI = MF.getFrameInfo(); - *MaybeSpillFI = MFI.CreateSpillStackObject(TRI.getSpillSize(RC), - TRI.getSpillAlign(RC)); - } - FreeReg = SpillCandidate; - SpillFI = MaybeSpillFI->value(); - TII.storeRegToStackSlot(MBB, MBBI, FreeReg, false, *SpillFI, &RC, &TRI, - Register()); - } - - bool hasSpilled() const { return SpillFI.has_value(); } - - /// Returns the free register (found from scavenging or spilling a register). - Register freeRegister() const { return FreeReg; } - - Register operator*() const { return freeRegister(); } - - ~ScopedScavengeOrSpill() { - if (hasSpilled()) - TII.loadRegFromStackSlot(MBB, MBBI, FreeReg, *SpillFI, &RC, &TRI, - Register()); - } - -private: - MachineBasicBlock &MBB; - MachineBasicBlock::iterator MBBI; - const TargetRegisterClass &RC; - const AArch64InstrInfo &TII; - const TargetRegisterInfo &TRI; - Register FreeReg = AArch64::NoRegister; - std::optional<int> SpillFI; -}; - -/// Emergency stack slots for expanding SPILL_PPR_TO_ZPR_SLOT_PSEUDO and -/// FILL_PPR_FROM_ZPR_SLOT_PSEUDO. -struct EmergencyStackSlots { - std::optional<int> ZPRSpillFI; - std::optional<int> PPRSpillFI; - std::optional<int> GPRSpillFI; -}; - -/// Registers available for scavenging (ZPR, PPR3b, GPR). -struct ScavengeableRegs { - BitVector ZPRRegs; - BitVector PPR3bRegs; - BitVector GPRRegs; -}; - -static bool isInPrologueOrEpilogue(const MachineInstr &MI) { - return MI.getFlag(MachineInstr::FrameSetup) || - MI.getFlag(MachineInstr::FrameDestroy); -} - -/// Expands: -/// ``` -/// SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 -/// ``` -/// To: -/// ``` -/// $z0 = CPY_ZPzI_B $p0, 1, 0 -/// STR_ZXI $z0, $stack.0, 0 -/// ``` -/// While ensuring a ZPR ($z0 in this example) is free for the predicate ( -/// spilling if necessary). -static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB, - MachineInstr &MI, - const TargetRegisterInfo &TRI, - LiveRegUnits const &UsedRegs, - ScavengeableRegs const &SR, - EmergencyStackSlots &SpillSlots) { - MachineFunction &MF = *MBB.getParent(); - auto *TII = - static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); - - ScopedScavengeOrSpill ZPredReg( - MF, MBB, MI, AArch64::Z0, AArch64::ZPRRegClass, UsedRegs, SR.ZPRRegs, - isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI); - - SmallVector<MachineInstr *, 2> MachineInstrs; - const DebugLoc &DL = MI.getDebugLoc(); - MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::CPY_ZPzI_B)) - .addReg(*ZPredReg, RegState::Define) - .add(MI.getOperand(0)) - .addImm(1) - .addImm(0) - .getInstr()); - MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::STR_ZXI)) - .addReg(*ZPredReg) - .add(MI.getOperand(1)) - .addImm(MI.getOperand(2).getImm()) - .setMemRefs(MI.memoperands()) - .getInstr()); - propagateFrameFlags(MI, MachineInstrs); -} - -/// Expands: -/// ``` -/// $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 -/// ``` -/// To: -/// ``` -/// $z0 = LDR_ZXI %stack.0, 0 -/// $p0 = PTRUE_B 31, implicit $vg -/// $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv -/// ``` -/// While ensuring a ZPR ($z0 in this example) is free for the predicate ( -/// spilling if necessary). If the status flags are in use at the point of -/// expansion they are preserved (by moving them to/from a GPR). This may cause -/// an additional spill if no GPR is free at the expansion point. -static bool expandFillPPRFromZPRSlotPseudo( - MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI, - LiveRegUnits const &UsedRegs, ScavengeableRegs const &SR, - MachineInstr *&LastPTrue, EmergencyStackSlots &SpillSlots) { - MachineFunction &MF = *MBB.getParent(); - auto *TII = - static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); - - ScopedScavengeOrSpill ZPredReg( - MF, MBB, MI, AArch64::Z0, AArch64::ZPRRegClass, UsedRegs, SR.ZPRRegs, - isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI); - - ScopedScavengeOrSpill PredReg( - MF, MBB, MI, AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs, SR.PPR3bRegs, - isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI, - /*PreferredReg=*/ - LastPTrue ? LastPTrue->getOperand(0).getReg() : AArch64::NoRegister); - - // Elide NZCV spills if we know it is not used. - bool IsNZCVUsed = !UsedRegs.available(AArch64::NZCV); - std::optional<ScopedScavengeOrSpill> NZCVSaveReg; - if (IsNZCVUsed) - NZCVSaveReg.emplace( - MF, MBB, MI, AArch64::X0, AArch64::GPR64RegClass, UsedRegs, SR.GPRRegs, - isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.GPRSpillFI); - SmallVector<MachineInstr *, 4> MachineInstrs; - const DebugLoc &DL = MI.getDebugLoc(); - MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::LDR_ZXI)) - .addReg(*ZPredReg, RegState::Define) - .add(MI.getOperand(1)) - .addImm(MI.getOperand(2).getImm()) - .setMemRefs(MI.memoperands()) - .getInstr()); - if (IsNZCVUsed) - MachineInstrs.push_back( - BuildMI(MBB, MI, DL, TII->get(AArch64::MRS)) - .addReg(NZCVSaveReg->freeRegister(), RegState::Define) - .addImm(AArch64SysReg::NZCV) - .addReg(AArch64::NZCV, RegState::Implicit) - .getInstr()); - - // Reuse previous ptrue if we know it has not been clobbered. - if (LastPTrue) { - assert(*PredReg == LastPTrue->getOperand(0).getReg()); - LastPTrue->moveBefore(&MI); - } else { - LastPTrue = BuildMI(MBB, MI, DL, TII->get(AArch64::PTRUE_B)) - .addReg(*PredReg, RegState::Define) - .addImm(31); - } - MachineInstrs.push_back(LastPTrue); - MachineInstrs.push_back( - BuildMI(MBB, MI, DL, TII->get(AArch64::CMPNE_PPzZI_B)) - .addReg(MI.getOperand(0).getReg(), RegState::Define) - .addReg(*PredReg) - .addReg(*ZPredReg) - .addImm(0) - .addReg(AArch64::NZCV, RegState::ImplicitDefine) - .getInstr()); - if (IsNZCVUsed) - MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::MSR)) - .addImm(AArch64SysReg::NZCV) - .addReg(NZCVSaveReg->freeRegister()) - .addReg(AArch64::NZCV, RegState::ImplicitDefine) - .getInstr()); - - propagateFrameFlags(MI, MachineInstrs); - return PredReg.hasSpilled(); -} - -/// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO -/// operations within the MachineBasicBlock \p MBB. -static bool expandSMEPPRToZPRSpillPseudos(MachineBasicBlock &MBB, - const TargetRegisterInfo &TRI, - ScavengeableRegs const &SR, - EmergencyStackSlots &SpillSlots) { - LiveRegUnits UsedRegs(TRI); - UsedRegs.addLiveOuts(MBB); - bool HasPPRSpills = false; - MachineInstr *LastPTrue = nullptr; - for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) { - UsedRegs.stepBackward(MI); - switch (MI.getOpcode()) { - case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: - if (LastPTrue && - MI.definesRegister(LastPTrue->getOperand(0).getReg(), &TRI)) - LastPTrue = nullptr; - HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR, - LastPTrue, SpillSlots); - MI.eraseFromParent(); - break; - case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: - expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR, SpillSlots); - MI.eraseFromParent(); - [[fallthrough]]; - default: - LastPTrue = nullptr; - break; - } - } - - return HasPPRSpills; -} - void AArch64FrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { - - AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - const TargetSubtargetInfo &TSI = MF.getSubtarget(); - const TargetRegisterInfo &TRI = *TSI.getRegisterInfo(); - - // If predicates spills are 16-bytes we may need to expand - // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO. - if (AFI->hasStackFrame() && TRI.getSpillSize(AArch64::PPRRegClass) == 16) { - auto ComputeScavengeableRegisters = [&](unsigned RegClassID) { - BitVector Regs = TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID)); - assert(Regs.count() > 0 && "Expected scavengeable registers"); - return Regs; - }; - - ScavengeableRegs SR{}; - SR.ZPRRegs = ComputeScavengeableRegisters(AArch64::ZPRRegClassID); - // Only p0-7 are possible as the second operand of cmpne (needed for fills). - SR.PPR3bRegs = ComputeScavengeableRegisters(AArch64::PPR_3bRegClassID); - SR.GPRRegs = ComputeScavengeableRegisters(AArch64::GPR64RegClassID); - - EmergencyStackSlots SpillSlots; - for (MachineBasicBlock &MBB : MF) { - // In the case we had to spill a predicate (in the range p0-p7) to reload - // a predicate (>= p8), additional spill/fill pseudos will be created. - // These need an additional expansion pass. Note: There will only be at - // most two expansion passes, as spilling/filling a predicate in the range - // p0-p7 never requires spilling another predicate. - for (int Pass = 0; Pass < 2; Pass++) { - bool HasPPRSpills = - expandSMEPPRToZPRSpillPseudos(MBB, TRI, SR, SpillSlots); - assert((Pass == 0 || !HasPPRSpills) && "Did not expect PPR spills"); - if (!HasPPRSpills) - break; - } - } - } - - MachineFrameInfo &MFI = MF.getFrameInfo(); - assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && "Upwards growing stack unsupported"); @@ -3279,6 +2949,9 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( if (!MF.hasEHFunclets()) return; + MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *AFI = MF.getInfo<AArch64FunctionInfo>(); + // Win64 C++ EH needs to allocate space for the catch objects in the fixed // object area right next to the UnwindHelp object. WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); @@ -4280,18 +3953,10 @@ void AArch64FrameLowering::emitRemarks( } unsigned RegTy = StackAccess::AccessType::GPR; - if (MFI.hasScalableStackID(FrameIdx)) { - // SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO - // spill/fill the predicate as a data vector (so are an FPR access). - if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO && - MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO && - AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) { - RegTy = StackAccess::PPR; - } else - RegTy = StackAccess::FPR; - } else if (AArch64InstrInfo::isFpOrNEON(MI)) { + if (MFI.hasScalableStackID(FrameIdx)) + RegTy = isPPRAccess(MI) ? StackAccess::PPR : StackAccess::FPR; + else if (AArch64InstrInfo::isFpOrNEON(MI)) RegTy = StackAccess::FPR; - } StackAccesses[ArrIdx].AccessTypes |= RegTy; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 5a90da1..b8761d97 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2579,8 +2579,6 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::STZ2Gi: case AArch64::STZGi: case AArch64::TAGPstack: - case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: - case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: return 2; case AArch64::LD1B_D_IMM: case AArch64::LD1B_H_IMM: @@ -4387,8 +4385,6 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, MinOffset = -256; MaxOffset = 254; break; - case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: - case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: case AArch64::LDR_ZXI: case AArch64::STR_ZXI: Scale = TypeSize::getScalable(16); @@ -5098,33 +5094,31 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg) .addImm(0) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + } else if (Subtarget.hasZeroCycleRegMoveGPR64() && + !Subtarget.hasZeroCycleRegMoveGPR32()) { + // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. + MCRegister DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + assert(DestRegX.isValid() && "Destination super-reg not valid"); + MCRegister SrcRegX = + SrcReg == AArch64::WZR + ? AArch64::XZR + : TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + assert(SrcRegX.isValid() && "Source super-reg not valid"); + // This instruction is reading and writing X registers. This may upset + // the register scavenger and machine verifier, so we need to indicate + // that we are reading an undefined value from SrcRegX, but a proper + // value from SrcReg. + BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) + .addReg(AArch64::XZR) + .addReg(SrcRegX, RegState::Undef) + .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); } else { - if (Subtarget.hasZeroCycleRegMoveGPR64() && - !Subtarget.hasZeroCycleRegMoveGPR32()) { - // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. - MCRegister DestRegX = TRI->getMatchingSuperReg( - DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); - assert(DestRegX.isValid() && "Destination super-reg not valid"); - MCRegister SrcRegX = - SrcReg == AArch64::WZR - ? AArch64::XZR - : TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, - &AArch64::GPR64spRegClass); - assert(SrcRegX.isValid() && "Source super-reg not valid"); - // This instruction is reading and writing X registers. This may upset - // the register scavenger and machine verifier, so we need to indicate - // that we are reading an undefined value from SrcRegX, but a proper - // value from SrcReg. - BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) - .addReg(AArch64::XZR) - .addReg(SrcRegX, RegState::Undef) - .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); - } else { - // Otherwise, expand to ORR WZR. - BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) - .addReg(AArch64::WZR) - .addReg(SrcReg, getKillRegState(KillSrc)); - } + // Otherwise, expand to ORR WZR. + BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) + .addReg(AArch64::WZR) + .addReg(SrcReg, getKillRegState(KillSrc)); } return; } @@ -5650,11 +5644,6 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, "Unexpected register store without SVE store instructions"); Opc = AArch64::STR_ZXI; StackID = TargetStackID::ScalableVector; - } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.isSVEorStreamingSVEAvailable() && - "Unexpected predicate store without SVE store instructions"); - Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO; - StackID = TargetStackID::ScalableVector; } break; case 24: @@ -5835,11 +5824,6 @@ void AArch64InstrInfo::loadRegFromStackSlot( "Unexpected register load without SVE load instructions"); Opc = AArch64::LDR_ZXI; StackID = TargetStackID::ScalableVector; - } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.isSVEorStreamingSVEAvailable() && - "Unexpected predicate load without SVE load instructions"); - Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO; - StackID = TargetStackID::ScalableVector; } break; case 24: diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index aed137c..1568161 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -57,10 +57,7 @@ static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I) { case AArch64::ST1B_2Z_IMM: case AArch64::STR_ZXI: case AArch64::LDR_ZXI: - case AArch64::CPY_ZPzI_B: - case AArch64::CMPNE_PPzZI_B: case AArch64::PTRUE_C_B: - case AArch64::PTRUE_B: return I->getFlag(MachineInstr::FrameSetup) || I->getFlag(MachineInstr::FrameDestroy); case AArch64::SEH_SavePReg: diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 5d89862..ef974df 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -980,19 +980,10 @@ class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size, //****************************************************************************** // SVE predicate register classes. - -// Note: This hardware mode is enabled in AArch64Subtarget::getHwModeSet() -// (without the use of the table-gen'd predicates). -def SMEWithZPRPredicateSpills : HwMode<[Predicate<"false">]>; - -def PPRSpillFillRI : RegInfoByHwMode< - [DefaultMode, SMEWithZPRPredicateSpills], - [RegInfo<16,16,16>, RegInfo<16,128,128>]>; - class PPRClass<int firstreg, int lastreg, int step = 1> : RegisterClass<"AArch64", [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16, (sequence "P%u", firstreg, lastreg, step)> { - let RegInfos = PPRSpillFillRI; + let Size = 16; } def PPR : PPRClass<0, 15> { diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 98e0a11..12ddf47 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -86,11 +86,6 @@ static cl::alias AArch64StreamingStackHazardSize( cl::desc("alias for -aarch64-streaming-hazard-size"), cl::aliasopt(AArch64StreamingHazardSize)); -static cl::opt<bool> EnableZPRPredicateSpills( - "aarch64-enable-zpr-predicate-spills", cl::init(false), cl::Hidden, - cl::desc( - "Enables spilling/reloading SVE predicates as data vectors (ZPRs)")); - static cl::opt<unsigned> VScaleForTuningOpt("sve-vscale-for-tuning", cl::Hidden, cl::desc("Force a vscale for tuning factor for SVE")); @@ -426,20 +421,6 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, EnableSubregLiveness = EnableSubregLivenessTracking.getValue(); } -unsigned AArch64Subtarget::getHwModeSet() const { - AArch64HwModeBits Modes = AArch64HwModeBits::DefaultMode; - - // Use a special hardware mode in streaming[-compatible] functions with - // aarch64-enable-zpr-predicate-spills. This changes the spill size (and - // alignment) for the predicate register class. - if (EnableZPRPredicateSpills.getValue() && - (isStreaming() || isStreamingCompatible())) { - Modes |= AArch64HwModeBits::SMEWithZPRPredicateSpills; - } - - return to_underlying(Modes); -} - const CallLowering *AArch64Subtarget::getCallLowering() const { return CallLoweringInfo.get(); } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 671df35..8974965 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -130,8 +130,6 @@ public: bool IsStreaming = false, bool IsStreamingCompatible = false, bool HasMinSize = false); - virtual unsigned getHwModeSet() const override; - // Getters for SubtargetFeatures defined in tablegen #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ bool GETTER() const { return ATTRIBUTE; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 50a8754..479e345 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -5666,18 +5666,21 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost( VectorType *AccumVectorType = VectorType::get(AccumType, VF.divideCoefficientBy(Ratio)); // We don't yet support all kinds of legalization. - auto TA = TLI->getTypeAction(AccumVectorType->getContext(), - EVT::getEVT(AccumVectorType)); - switch (TA) { + auto TC = TLI->getTypeConversion(AccumVectorType->getContext(), + EVT::getEVT(AccumVectorType)); + switch (TC.first) { default: return Invalid; case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: case TargetLowering::TypeSplitVector: + // The legalised type (e.g. after splitting) must be legal too. + if (TLI->getTypeAction(AccumVectorType->getContext(), TC.second) != + TargetLowering::TypeLegal) + return Invalid; break; } - // Check what kind of type-legalisation happens. std::pair<InstructionCost, MVT> AccumLT = getTypeLegalizationCost(AccumVectorType); std::pair<InstructionCost, MVT> InputLT = diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index be44b8f..33f35ad 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -58,20 +58,6 @@ def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO : let hasSideEffects = 0; } -def SPILL_PPR_TO_ZPR_SLOT_PSEUDO : - Pseudo<(outs), (ins PPRorPNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), []>, Sched<[]> -{ - let mayStore = 1; - let hasSideEffects = 0; -} - -def FILL_PPR_FROM_ZPR_SLOT_PSEUDO : - Pseudo<(outs PPRorPNRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9), []>, Sched<[]> -{ - let mayLoad = 1; - let hasSideEffects = 0; -} - def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; // SME ZA loads and stores def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore, |