diff options
Diffstat (limited to 'llvm/lib/Target/AArch64')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 33 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 100 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 88 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64Subtarget.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 1 |
11 files changed, 220 insertions, 50 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp index cb831963..7712d2a 100644 --- a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -629,8 +629,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { } const MCInstrDesc &MCID = TII->get(Opc); // Create a dummy virtual register for the SUBS def. - Register DestReg = - MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI)); + Register DestReg = MRI->createVirtualRegister(TII->getRegClass(MCID, 0)); // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz. BuildMI(*Head, Head->end(), TermDL, MCID) .addReg(DestReg, RegState::Define | RegState::Dead) @@ -638,8 +637,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { .addImm(0) .addImm(0); // SUBS uses the GPR*sp register classes. - MRI->constrainRegClass(HeadCond[2].getReg(), - TII->getRegClass(MCID, 1, TRI)); + MRI->constrainRegClass(HeadCond[2].getReg(), TII->getRegClass(MCID, 1)); } Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end()); @@ -686,10 +684,10 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC); const MCInstrDesc &MCID = TII->get(Opc); MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(), - TII->getRegClass(MCID, 0, TRI)); + TII->getRegClass(MCID, 0)); if (CmpMI->getOperand(FirstOp + 1).isReg()) MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(), - TII->getRegClass(MCID, 1, TRI)); + TII->getRegClass(MCID, 1)); MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID) .add(CmpMI->getOperand(FirstOp)); // Register Rn if (isZBranch) diff --git a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 75361f5..4ff49a6 100644 --- a/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -156,7 +156,7 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock( LLVM_DEBUG(dbgs() << " Ignoring, def is tied operand.\n"); continue; } - const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI); + const TargetRegisterClass *RC = TII->getRegClass(Desc, I); unsigned NewReg; if (RC == nullptr) { LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 76a790dc..8457f61 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22308,6 +22308,37 @@ static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift); } +// Attempt to combine the following patterns: +// SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b) +// SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b) +// The CSET may be preceded by a ZEXT. +static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() != ISD::SUB) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + SDValue N1 = N->getOperand(1); + if (N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) + N1 = N1.getOperand(0); + if (!N1.hasOneUse() || getCSETCondCode(N1) != AArch64CC::LO) + return SDValue(); + + SDValue Flags = N1.getOperand(3); + if (Flags.getOpcode() != AArch64ISD::SUBS) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + if (N0->getOpcode() == ISD::SUB) + return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0), + N0.getOperand(1), Flags); + return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT), + Flags); +} + static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Try to change sum of two reductions. @@ -22329,6 +22360,8 @@ static SDValue performAddSubCombine(SDNode *N, return Val; if (SDValue Val = performAddSubIntoVectorOp(N, DCI.DAG)) return Val; + if (SDValue Val = performSubWithBorrowCombine(N, DCI.DAG)) + return Val; if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG)) return Val; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 4b40733..b93e562 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -91,7 +91,7 @@ static cl::opt<unsigned> GatherOptSearchLimit( "machine-combiner gather pattern optimization")); AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) - : AArch64GenInstrInfo(STI, AArch64::ADJCALLSTACKDOWN, + : AArch64GenInstrInfo(STI, RI, AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, AArch64::CATCHRET), RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {} @@ -1780,6 +1780,16 @@ static unsigned sForm(MachineInstr &Instr) { case AArch64::SUBSWri: case AArch64::SUBSXrr: case AArch64::SUBSXri: + case AArch64::ANDSWri: + case AArch64::ANDSWrr: + case AArch64::ANDSWrs: + case AArch64::ANDSXri: + case AArch64::ANDSXrr: + case AArch64::ANDSXrs: + case AArch64::BICSWrr: + case AArch64::BICSXrr: + case AArch64::BICSWrs: + case AArch64::BICSXrs: return Instr.getOpcode(); case AArch64::ADDWrr: @@ -1810,6 +1820,22 @@ static unsigned sForm(MachineInstr &Instr) { return AArch64::ANDSWri; case AArch64::ANDXri: return AArch64::ANDSXri; + case AArch64::ANDWrr: + return AArch64::ANDSWrr; + case AArch64::ANDWrs: + return AArch64::ANDSWrs; + case AArch64::ANDXrr: + return AArch64::ANDSXrr; + case AArch64::ANDXrs: + return AArch64::ANDSXrs; + case AArch64::BICWrr: + return AArch64::BICSWrr; + case AArch64::BICXrr: + return AArch64::BICSXrr; + case AArch64::BICWrs: + return AArch64::BICSWrs; + case AArch64::BICXrs: + return AArch64::BICSXrs; } } @@ -1947,6 +1973,25 @@ static bool isSUBSRegImm(unsigned Opcode) { return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri; } +static bool isANDOpcode(MachineInstr &MI) { + unsigned Opc = sForm(MI); + switch (Opc) { + case AArch64::ANDSWri: + case AArch64::ANDSWrr: + case AArch64::ANDSWrs: + case AArch64::ANDSXri: + case AArch64::ANDSXrr: + case AArch64::ANDSXrs: + case AArch64::BICSWrr: + case AArch64::BICSXrr: + case AArch64::BICSWrs: + case AArch64::BICSXrs: + return true; + default: + return false; + } +} + /// Check if CmpInstr can be substituted by MI. /// /// CmpInstr can be substituted: @@ -1984,7 +2029,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, // 1) MI and CmpInstr set N and V to the same value. // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when // signed overflow occurs, so CmpInstr could still be simplified away. - if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap)) + // Note that Ands and Bics instructions always clear the V flag. + if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI)) return false; AccessKind AccessToCheck = AK_Write; @@ -5618,7 +5664,6 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags) const { MachineFunction &MF = *MBB.getParent(); @@ -5632,7 +5677,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, bool Offset = true; MCRegister PNRReg = MCRegister::NoRegister; unsigned StackID = TargetStackID::Default; - switch (TRI->getSpillSize(*RC)) { + switch (RI.getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::STRBui; @@ -5795,10 +5840,12 @@ static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, .addMemOperand(MMO); } -void AArch64InstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, - int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register DestReg, int FI, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); @@ -5810,7 +5857,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( bool Offset = true; unsigned StackID = TargetStackID::Default; Register PNRReg = MCRegister::NoRegister; - switch (TRI->getSpillSize(*RC)) { + switch (TRI.getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRBui; @@ -6446,10 +6493,10 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( "Mismatched register size in non subreg COPY"); if (IsSpill) storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, - getRegClass(SrcReg), &TRI, Register()); + getRegClass(SrcReg), Register()); else loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, - getRegClass(DstReg), &TRI, Register()); + getRegClass(DstReg), Register()); return &*--InsertPt; } @@ -6467,8 +6514,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( assert(SrcMO.getSubReg() == 0 && "Unexpected subreg on physical register"); storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(), - FrameIndex, &AArch64::GPR64RegClass, &TRI, - Register()); + FrameIndex, &AArch64::GPR64RegClass, Register()); return &*--InsertPt; } @@ -6502,7 +6548,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == TRI.getRegSizeInBits(*FillRC) && "Mismatched regclass size on folded subreg COPY"); - loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI, + loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, Register()); MachineInstr &LoadMI = *--InsertPt; MachineOperand &LoadDst = LoadMI.getOperand(0); @@ -9590,6 +9636,27 @@ AArch64InstrInfo::getOutliningCandidateInfo( unsigned NumBytesToCreateFrame = 0; + // Avoid splitting ADRP ADD/LDR pair into outlined functions. + // These instructions are fused together by the scheduler. + // Any candidate where ADRP is the last instruction should be rejected + // as that will lead to splitting ADRP pair. + MachineInstr &LastMI = RepeatedSequenceLocs[0].back(); + MachineInstr &FirstMI = RepeatedSequenceLocs[0].front(); + if (LastMI.getOpcode() == AArch64::ADRP && + (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0 && + (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0) { + return std::nullopt; + } + + // Similarly any candidate where the first instruction is ADD/LDR with a + // page offset should be rejected to avoid ADRP splitting. + if ((FirstMI.getOpcode() == AArch64::ADDXri || + FirstMI.getOpcode() == AArch64::LDRXui) && + (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_PAGEOFF) != 0 && + (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) != 0) { + return std::nullopt; + } + // We only allow outlining for functions having exactly matching return // address signing attributes, i.e., all share the same value for the // attribute "sign-return-address" and all share the same type of key they @@ -10996,8 +11063,6 @@ static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, MachineBasicBlock::iterator InsertTo) { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); - const TargetRegisterInfo *TRI = - MBB.getParent()->getSubtarget().getRegisterInfo(); MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI); Register Result = 0; for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) { @@ -11006,8 +11071,7 @@ static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, MRI.getRegClass(NewMI->getOperand(0).getReg())); NewMI->getOperand(I).setReg(Result); } else if (I == ReplaceOprNum) { - MRI.constrainRegClass(ReplaceReg, - TII->getRegClass(NewMI->getDesc(), I, TRI)); + MRI.constrainRegClass(ReplaceReg, TII->getRegClass(NewMI->getDesc(), I)); NewMI->getOperand(I).setReg(ReplaceReg); } } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 179574a..979c9ac 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -353,14 +353,13 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; // This tells target independent code that it is okay to pass instructions diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 76f076a..b30e3d0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4444,6 +4444,11 @@ defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", [(AArch64Prefetch timm:$Rt, (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; +// PRFM falls back to PRFUM for negative or unaligned offsets (not a multiple +// of 8). +def : InstAlias<"prfm $Rt, [$Rn, $offset]", + (PRFUMi prfop:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; + //--- // (unscaled immediate, unprivileged) defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index 04e76c7..d25db89 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -595,17 +595,17 @@ bool AArch64MIPeepholeOpt::splitTwoPartImm( // Determine register classes for destinations and register operands const TargetRegisterClass *FirstInstrDstRC = - TII->getRegClass(TII->get(Opcode.first), 0, TRI); + TII->getRegClass(TII->get(Opcode.first), 0); const TargetRegisterClass *FirstInstrOperandRC = - TII->getRegClass(TII->get(Opcode.first), 1, TRI); + TII->getRegClass(TII->get(Opcode.first), 1); const TargetRegisterClass *SecondInstrDstRC = (Opcode.first == Opcode.second) ? FirstInstrDstRC - : TII->getRegClass(TII->get(Opcode.second), 0, TRI); + : TII->getRegClass(TII->get(Opcode.second), 0); const TargetRegisterClass *SecondInstrOperandRC = (Opcode.first == Opcode.second) ? FirstInstrOperandRC - : TII->getRegClass(TII->get(Opcode.second), 1, TRI); + : TII->getRegClass(TII->get(Opcode.second), 1); // Get old registers destinations and new register destinations Register DstReg = MI.getOperand(0).getReg(); @@ -784,14 +784,14 @@ bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) { } const TargetRegisterClass *DstRC64 = - TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI); + TII->getRegClass(TII->get(MI.getOpcode()), 0); const TargetRegisterClass *DstRC32 = TRI->getSubRegisterClass(DstRC64, AArch64::sub_32); assert(DstRC32 && "Destination register class of UBFMXri doesn't have a " "sub_32 subregister class"); const TargetRegisterClass *SrcRC64 = - TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI); + TII->getRegClass(TII->get(MI.getOpcode()), 1); const TargetRegisterClass *SrcRC32 = TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32); assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 " diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index a5048b9..f3cf222 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -897,7 +897,7 @@ AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, const MCInstrDesc &MCID = TII->get(AArch64::ADDXri); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); - MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this)); + MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0)); unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); BuildMI(*MBB, Ins, DL, MCID, BaseReg) @@ -1123,24 +1123,85 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -// FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register allocation -// where a consecutive multi-vector tuple is constructed from the same indices -// of multiple strided loads. This may still result in unnecessary copies -// between the loads and the tuple. Here we try to return a hint to assign the -// contiguous ZPRMulReg starting at the same register as the first operand of -// the pseudo, which should be a subregister of the first strided load. +// We add regalloc hints for different cases: +// * Choosing a better destination operand for predicated SVE instructions +// where the inactive lanes are undef, by choosing a register that is not +// unique to the other operands of the instruction. // -// For example, if the first strided load has been assigned $z16_z20_z24_z28 -// and the operands of the pseudo are each accessing subregister zsub2, we -// should look through through Order to find a contiguous register which -// begins with $z24 (i.e. $z24_z25_z26_z27). +// * Improve register allocation for SME multi-vector instructions where we can +// benefit from the strided- and contiguous register multi-vector tuples. // +// Here FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register +// allocation where a consecutive multi-vector tuple is constructed from the +// same indices of multiple strided loads. This may still result in +// unnecessary copies between the loads and the tuple. Here we try to return a +// hint to assign the contiguous ZPRMulReg starting at the same register as +// the first operand of the pseudo, which should be a subregister of the first +// strided load. +// +// For example, if the first strided load has been assigned $z16_z20_z24_z28 +// and the operands of the pseudo are each accessing subregister zsub2, we +// should look through through Order to find a contiguous register which +// begins with $z24 (i.e. $z24_z25_z26_z27). bool AArch64RegisterInfo::getRegAllocationHints( Register VirtReg, ArrayRef<MCPhysReg> Order, SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { - auto &ST = MF.getSubtarget<AArch64Subtarget>(); + const AArch64InstrInfo *TII = + MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + // For predicated SVE instructions where the inactive lanes are undef, + // pick a destination register that is not unique to avoid introducing + // a movprfx. + const TargetRegisterClass *RegRC = MRI.getRegClass(VirtReg); + if (AArch64::ZPRRegClass.hasSubClassEq(RegRC)) { + for (const MachineOperand &DefOp : MRI.def_operands(VirtReg)) { + const MachineInstr &Def = *DefOp.getParent(); + if (DefOp.isImplicit() || + (TII->get(Def.getOpcode()).TSFlags & AArch64::FalseLanesMask) != + AArch64::FalseLanesUndef) + continue; + + unsigned InstFlags = + TII->get(AArch64::getSVEPseudoMap(Def.getOpcode())).TSFlags; + + for (MCPhysReg R : Order) { + auto AddHintIfSuitable = [&](MCPhysReg R, const MachineOperand &MO) { + // R is a suitable register hint if there exists an operand for the + // instruction that is not yet allocated a register or if R matches + // one of the other source operands. + if (!VRM->hasPhys(MO.getReg()) || VRM->getPhys(MO.getReg()) == R) + Hints.push_back(R); + }; + + switch (InstFlags & AArch64::DestructiveInstTypeMask) { + default: + break; + case AArch64::DestructiveTernaryCommWithRev: + AddHintIfSuitable(R, Def.getOperand(2)); + AddHintIfSuitable(R, Def.getOperand(3)); + AddHintIfSuitable(R, Def.getOperand(4)); + break; + case AArch64::DestructiveBinaryComm: + case AArch64::DestructiveBinaryCommWithRev: + AddHintIfSuitable(R, Def.getOperand(2)); + AddHintIfSuitable(R, Def.getOperand(3)); + break; + case AArch64::DestructiveBinary: + case AArch64::DestructiveBinaryImm: + AddHintIfSuitable(R, Def.getOperand(2)); + break; + } + } + } + + if (Hints.size()) + return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, + MF, VRM); + } + if (!ST.hasSME() || !ST.isStreaming()) return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM); @@ -1153,8 +1214,7 @@ bool AArch64RegisterInfo::getRegAllocationHints( // FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy // instructions over reducing the number of clobbered callee-save registers, // so we add the strided registers as a hint. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned RegID = MRI.getRegClass(VirtReg)->getID(); + unsigned RegID = RegRC->getID(); if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID || RegID == AArch64::ZPR4StridedOrContiguousRegClassID) { diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index e1f4386..65b6077 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3597,6 +3597,18 @@ let Predicates = [HasSVE_or_SME] in { def : Pat<(sext (i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index))), (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>; + + // Extracts of ``unsigned'' i8 or i16 elements lead to the zero-extend being + // transformed to an AND mask. The mask is redundant since UMOV already zeroes + // the high bits of the destination register. + def : Pat<(i32 (and (vector_extract nxv16i8:$vec, VectorIndexB:$index), 0xff)), + (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>; + def : Pat<(i32 (and (vector_extract nxv8i16:$vec, VectorIndexH:$index), 0xffff)), + (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)>; + def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index)))), (i64 0xff))), + (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)), sub_32)>; + def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract nxv8i16:$vec, VectorIndexH:$index)))), (i64 0xffff))), + (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)), sub_32)>; } // End HasNEON // Extract first element from vector. diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 8974965..ab4004e 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -157,7 +157,7 @@ public: bool enableMachineScheduler() const override { return true; } bool enablePostRAScheduler() const override { return usePostRAScheduler(); } bool enableSubRegLiveness() const override { return EnableSubregLiveness; } - + bool enableTerminalRule() const override { return true; } bool enableMachinePipeliner() const override; bool useDFAforSMS() const override { return false; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 068954f..0bf2b31 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -54,7 +54,6 @@ #include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h" #include <memory> #include <optional> -#include <string> using namespace llvm; |
