diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.h')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 290 |
1 files changed, 183 insertions, 107 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index e1d7a07..0b54513 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -52,6 +52,11 @@ static const MachineMemOperand::Flags MOLastUse = static const MachineMemOperand::Flags MOCooperative = MachineMemOperand::MOTargetFlag3; +/// Mark the MMO of accesses to memory locations that are +/// never written to by other threads. +static const MachineMemOperand::Flags MOThreadPrivate = + MachineMemOperand::MOTargetFlag4; + /// Utility to store machine instructions worklist. struct SIInstrWorklist { SIInstrWorklist() = default; @@ -125,6 +130,11 @@ public: unsigned SubIdx, const TargetRegisterClass *SubRC) const; private: + bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine, + bool NeedInversion) const; + + bool invertSCCUse(MachineInstr *SCCDef) const; + void swapOperands(MachineInstr &Inst) const; std::pair<bool, MachineBasicBlock *> @@ -136,6 +146,8 @@ private: void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const; + void lowerScalarAbsDiff(SIInstrWorklist &Worklist, MachineInstr &Inst) const; + void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const; void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst, @@ -172,7 +184,7 @@ private: void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI, SIInstrWorklist &Worklist) const; - void addSCCDefUsersToVALUWorklist(MachineOperand &Op, + void addSCCDefUsersToVALUWorklist(const MachineOperand &Op, MachineInstr &SCCDefInst, SIInstrWorklist &Worklist, Register NewCond = Register()) const; @@ -296,6 +308,8 @@ public: bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override; + std::optional<int64_t> getImmOrMaterializedImm(MachineOperand &Op) const; + unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, @@ -307,22 +321,20 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, - bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + int FrameIndex, const TargetRegisterClass *RC, Register VReg, + unsigned SubReg = 0, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; bool expandPostRAPseudo(MachineInstr &MI) const override; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, - const MachineInstr &Orig, - const TargetRegisterInfo &TRI) const override; + const MachineInstr &Orig) const override; // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp // instructions. Returns a pair of generated instructions. @@ -426,6 +438,9 @@ public: void removeModOperands(MachineInstr &MI) const; + void mutateAndCleanupImplicit(MachineInstr &MI, + const MCInstrDesc &NewDesc) const; + /// Return the extracted immediate value in a subregister use from a constant /// materialized in a super register. /// @@ -451,7 +466,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::SALU; } - bool isSALU(uint16_t Opcode) const { + bool isSALU(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SALU; } @@ -459,7 +474,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::VALU; } - bool isVALU(uint16_t Opcode) const { + bool isVALU(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VALU; } @@ -467,7 +482,7 @@ public: return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI); } - bool isImage(uint16_t Opcode) const { + bool isImage(uint32_t Opcode) const { return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode); } @@ -475,7 +490,7 @@ public: return isMUBUF(MI) || isMTBUF(MI) || isImage(MI) || isFLAT(MI); } - bool isVMEM(uint16_t Opcode) const { + bool isVMEM(uint32_t Opcode) const { return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode); } @@ -483,7 +498,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::SOP1; } - bool isSOP1(uint16_t Opcode) const { + bool isSOP1(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SOP1; } @@ -491,7 +506,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::SOP2; } - bool isSOP2(uint16_t Opcode) const { + bool isSOP2(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SOP2; } @@ -499,7 +514,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::SOPC; } - bool isSOPC(uint16_t Opcode) const { + bool isSOPC(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SOPC; } @@ -507,7 +522,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::SOPK; } - bool isSOPK(uint16_t Opcode) const { + bool isSOPK(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SOPK; } @@ -515,7 +530,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::SOPP; } - bool isSOPP(uint16_t Opcode) const { + bool isSOPP(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SOPP; } @@ -523,7 +538,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; } - bool isPacked(uint16_t Opcode) const { + bool isPacked(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::IsPacked; } @@ -531,7 +546,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::VOP1; } - bool isVOP1(uint16_t Opcode) const { + bool isVOP1(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VOP1; } @@ -539,7 +554,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::VOP2; } - bool isVOP2(uint16_t Opcode) const { + bool isVOP2(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VOP2; } @@ -549,13 +564,13 @@ public: static bool isVOP3(const MachineInstr &MI) { return isVOP3(MI.getDesc()); } - bool isVOP3(uint16_t Opcode) const { return isVOP3(get(Opcode)); } + bool isVOP3(uint32_t Opcode) const { return isVOP3(get(Opcode)); } static bool isSDWA(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::SDWA; } - bool isSDWA(uint16_t Opcode) const { + bool isSDWA(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SDWA; } @@ -563,7 +578,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::VOPC; } - bool isVOPC(uint16_t Opcode) const { + bool isVOPC(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VOPC; } @@ -571,7 +586,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; } - bool isMUBUF(uint16_t Opcode) const { + bool isMUBUF(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::MUBUF; } @@ -579,15 +594,19 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; } - bool isMTBUF(uint16_t Opcode) const { + bool isMTBUF(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::MTBUF; } + static bool isBUF(const MachineInstr &MI) { + return isMUBUF(MI) || isMTBUF(MI); + } + static bool isSMRD(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::SMRD; } - bool isSMRD(uint16_t Opcode) const { + bool isSMRD(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SMRD; } @@ -597,33 +616,35 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::DS; } - bool isDS(uint16_t Opcode) const { + bool isDS(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::DS; } static bool isLDSDMA(const MachineInstr &MI) { - return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI)); + return (isVALU(MI) && (isMUBUF(MI) || isFLAT(MI))) || + (MI.getDesc().TSFlags & SIInstrFlags::TENSOR_CNT); } - bool isLDSDMA(uint16_t Opcode) { - return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode)); + bool isLDSDMA(uint32_t Opcode) { + return (isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode))) || + (get(Opcode).TSFlags & SIInstrFlags::TENSOR_CNT); } static bool isGWS(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::GWS; } - bool isGWS(uint16_t Opcode) const { + bool isGWS(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::GWS; } - bool isAlwaysGDS(uint16_t Opcode) const; + bool isAlwaysGDS(uint32_t Opcode) const; static bool isMIMG(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::MIMG; } - bool isMIMG(uint16_t Opcode) const { + bool isMIMG(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::MIMG; } @@ -631,7 +652,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE; } - bool isVIMAGE(uint16_t Opcode) const { + bool isVIMAGE(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VIMAGE; } @@ -639,7 +660,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE; } - bool isVSAMPLE(uint16_t Opcode) const { + bool isVSAMPLE(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE; } @@ -647,7 +668,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::Gather4; } - bool isGather4(uint16_t Opcode) const { + bool isGather4(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::Gather4; } @@ -662,7 +683,7 @@ public: return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); } - bool isSegmentSpecificFLAT(uint16_t Opcode) const { + bool isSegmentSpecificFLAT(uint32_t Opcode) const { auto Flags = get(Opcode).TSFlags; return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); } @@ -671,7 +692,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal; } - bool isFLATGlobal(uint16_t Opcode) const { + bool isFLATGlobal(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal; } @@ -679,20 +700,20 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch; } - bool isFLATScratch(uint16_t Opcode) const { + bool isFLATScratch(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::FlatScratch; } // Any FLAT encoded instruction, including global_* and scratch_*. - bool isFLAT(uint16_t Opcode) const { + bool isFLAT(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::FLAT; } - /// \returns true for SCRATCH_ instructions, or FLAT_ instructions with - /// SCRATCH_ memory operands. + /// \returns true for SCRATCH_ instructions, or FLAT/BUF instructions unless + /// the MMOs do not include scratch. /// Conservatively correct; will return true if \p MI cannot be proven /// to not hit scratch. - bool mayAccessScratchThroughFlat(const MachineInstr &MI) const; + bool mayAccessScratch(const MachineInstr &MI) const; /// \returns true for FLAT instructions that can access VMEM. bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const; @@ -700,7 +721,7 @@ public: /// \returns true for FLAT instructions that can access LDS. bool mayAccessLDSThroughFlat(const MachineInstr &MI) const; - static bool isBlockLoadStore(uint16_t Opcode) { + static bool isBlockLoadStore(uint32_t Opcode) { switch (Opcode) { case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: @@ -714,6 +735,52 @@ public: } } + static bool setsSCCIfResultIsNonZero(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AMDGPU::S_ABSDIFF_I32: + case AMDGPU::S_ABS_I32: + case AMDGPU::S_AND_B32: + case AMDGPU::S_AND_B64: + case AMDGPU::S_ANDN2_B32: + case AMDGPU::S_ANDN2_B64: + case AMDGPU::S_ASHR_I32: + case AMDGPU::S_ASHR_I64: + case AMDGPU::S_BCNT0_I32_B32: + case AMDGPU::S_BCNT0_I32_B64: + case AMDGPU::S_BCNT1_I32_B32: + case AMDGPU::S_BCNT1_I32_B64: + case AMDGPU::S_BFE_I32: + case AMDGPU::S_BFE_I64: + case AMDGPU::S_BFE_U32: + case AMDGPU::S_BFE_U64: + case AMDGPU::S_LSHL_B32: + case AMDGPU::S_LSHL_B64: + case AMDGPU::S_LSHR_B32: + case AMDGPU::S_LSHR_B64: + case AMDGPU::S_NAND_B32: + case AMDGPU::S_NAND_B64: + case AMDGPU::S_NOR_B32: + case AMDGPU::S_NOR_B64: + case AMDGPU::S_NOT_B32: + case AMDGPU::S_NOT_B64: + case AMDGPU::S_OR_B32: + case AMDGPU::S_OR_B64: + case AMDGPU::S_ORN2_B32: + case AMDGPU::S_ORN2_B64: + case AMDGPU::S_QUADMASK_B32: + case AMDGPU::S_QUADMASK_B64: + case AMDGPU::S_WQM_B32: + case AMDGPU::S_WQM_B64: + case AMDGPU::S_XNOR_B32: + case AMDGPU::S_XNOR_B64: + case AMDGPU::S_XOR_B32: + case AMDGPU::S_XOR_B64: + return true; + default: + return false; + } + } + static bool isEXP(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::EXP; } @@ -726,7 +793,7 @@ public: Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1; } - bool isEXP(uint16_t Opcode) const { + bool isEXP(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::EXP; } @@ -734,7 +801,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet; } - bool isAtomicNoRet(uint16_t Opcode) const { + bool isAtomicNoRet(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet; } @@ -742,7 +809,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet; } - bool isAtomicRet(uint16_t Opcode) const { + bool isAtomicRet(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet; } @@ -751,13 +818,17 @@ public: SIInstrFlags::IsAtomicNoRet); } - bool isAtomic(uint16_t Opcode) const { + bool isAtomic(uint32_t Opcode) const { return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet | SIInstrFlags::IsAtomicNoRet); } static bool mayWriteLDSThroughDMA(const MachineInstr &MI) { - return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD; + unsigned Opc = MI.getOpcode(); + // Exclude instructions that read FROM LDS (not write to it) + return isLDSDMA(MI) && Opc != AMDGPU::BUFFER_STORE_LDS_DWORD && + Opc != AMDGPU::TENSOR_STORE_FROM_LDS && + Opc != AMDGPU::TENSOR_STORE_FROM_LDS_D2; } static bool isSBarrierSCCWrite(unsigned Opcode) { @@ -776,7 +847,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::WQM; } - bool isWQM(uint16_t Opcode) const { + bool isWQM(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::WQM; } @@ -784,7 +855,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; } - bool isDisableWQM(uint16_t Opcode) const { + bool isDisableWQM(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; } @@ -799,7 +870,7 @@ public: (isSpill(MI) && isVALU(MI)); } - bool isVGPRSpill(uint16_t Opcode) const { + bool isVGPRSpill(uint32_t Opcode) const { return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR && Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR && (isSpill(Opcode) && isVALU(Opcode)); @@ -811,13 +882,13 @@ public: (isSpill(MI) && isSALU(MI)); } - bool isSGPRSpill(uint16_t Opcode) const { + bool isSGPRSpill(uint32_t Opcode) const { return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR || Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR || (isSpill(Opcode) && isSALU(Opcode)); } - bool isSpill(uint16_t Opcode) const { + bool isSpill(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::Spill; } @@ -827,7 +898,7 @@ public: static bool isSpill(const MachineInstr &MI) { return isSpill(MI.getDesc()); } - static bool isWWMRegSpillOpcode(uint16_t Opcode) { + static bool isWWMRegSpillOpcode(uint32_t Opcode) { return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE || Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE || @@ -843,7 +914,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::DPP; } - bool isDPP(uint16_t Opcode) const { + bool isDPP(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::DPP; } @@ -851,7 +922,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::TRANS; } - bool isTRANS(uint16_t Opcode) const { + bool isTRANS(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::TRANS; } @@ -859,7 +930,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; } - bool isVOP3P(uint16_t Opcode) const { + bool isVOP3P(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VOP3P; } @@ -867,7 +938,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; } - bool isVINTRP(uint16_t Opcode) const { + bool isVINTRP(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VINTRP; } @@ -877,14 +948,14 @@ public: static bool isMAI(const MachineInstr &MI) { return isMAI(MI.getDesc()); } - bool isMAI(uint16_t Opcode) const { return isMAI(get(Opcode)); } + bool isMAI(uint32_t Opcode) const { return isMAI(get(Opcode)); } static bool isMFMA(const MachineInstr &MI) { return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; } - bool isMFMA(uint16_t Opcode) const { + bool isMFMA(uint32_t Opcode) const { return isMAI(Opcode) && Opcode != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && Opcode != AMDGPU::V_ACCVGPR_READ_B32_e64; } @@ -897,7 +968,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA; } - bool isWMMA(uint16_t Opcode) const { + bool isWMMA(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::IsWMMA; } @@ -905,7 +976,7 @@ public: return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI); } - bool isMFMAorWMMA(uint16_t Opcode) const { + bool isMFMAorWMMA(uint32_t Opcode) const { return isMFMA(Opcode) || isWMMA(Opcode) || isSWMMAC(Opcode); } @@ -913,11 +984,11 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC; } - bool isSWMMAC(uint16_t Opcode) const { + bool isSWMMAC(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC; } - bool isDOT(uint16_t Opcode) const { + bool isDOT(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::IsDOT; } @@ -931,7 +1002,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; } - bool isLDSDIR(uint16_t Opcode) const { + bool isLDSDIR(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::LDSDIR; } @@ -939,7 +1010,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::VINTERP; } - bool isVINTERP(uint16_t Opcode) const { + bool isVINTERP(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::VINTERP; } @@ -955,6 +1026,14 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; } + static bool usesASYNC_CNT(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::ASYNC_CNT; + } + + bool usesASYNC_CNT(uint32_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::ASYNC_CNT; + } + // Most sopk treat the immediate as a signed 16-bit, however some // use it as unsigned. static bool sopkIsZext(unsigned Opcode) { @@ -971,7 +1050,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; } - bool isScalarStore(uint16_t Opcode) const { + bool isScalarStore(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; } @@ -979,7 +1058,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; } - bool isFixedSize(uint16_t Opcode) const { + bool isFixedSize(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; } @@ -987,7 +1066,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; } - bool hasFPClamp(uint16_t Opcode) const { + bool hasFPClamp(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::FPClamp; } @@ -1007,7 +1086,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; } - bool usesFPDPRounding(uint16_t Opcode) const { + bool usesFPDPRounding(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; } @@ -1015,7 +1094,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; } - bool isFPAtomic(uint16_t Opcode) const { + bool isFPAtomic(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; } @@ -1060,7 +1139,7 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; } - bool doesNotReadTiedSource(uint16_t Opcode) const { + bool doesNotReadTiedSource(uint32_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead; } @@ -1128,13 +1207,13 @@ public: bool isVGPRCopy(const MachineInstr &MI) const { assert(isCopyInstr(MI)); Register Dest = MI.getOperand(0).getReg(); - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); return !RI.isSGPRReg(MRI, Dest); } bool hasVGPRUses(const MachineInstr &MI) const { - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); return llvm::any_of(MI.explicit_uses(), [&MRI, this](const MachineOperand &MO) { @@ -1303,7 +1382,7 @@ public: /// Return the size in bytes of the operand OpNo on the given // instruction opcode. - unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { + unsigned getOpSize(uint32_t Opcode, unsigned OpNo) const { const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo]; if (OpInfo.RegClass == -1) { @@ -1515,6 +1594,8 @@ public: bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg = Register()) const override; + bool canAddToBBProlog(const MachineInstr &MI) const; + MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, @@ -1576,10 +1657,6 @@ public: /// Return true if this opcode should not be used by codegen. bool isAsmOnlyOpcode(int MCOp) const; - const TargetRegisterClass * - getRegClass(const MCInstrDesc &TID, unsigned OpNum, - const TargetRegisterInfo *TRI) const override; - void fixImplicitOperands(MachineInstr &MI) const; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, @@ -1593,22 +1670,21 @@ public: const MachineInstr &MI, unsigned *PredCost = nullptr) const override; + const MachineOperand &getCalleeOperand(const MachineInstr &MI) const override; + InstructionUniformity - getInstructionUniformity(const MachineInstr &MI) const override final; + getInstructionUniformity(const MachineInstr &MI) const final; InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const; - const MIRFormatter *getMIRFormatter() const override { - if (!Formatter) - Formatter = std::make_unique<AMDGPUMIRFormatter>(); - return Formatter.get(); - } + const MIRFormatter *getMIRFormatter() const override; static unsigned getDSShaderTypeValue(const MachineFunction &MF); const TargetSchedModel &getSchedModel() const { return SchedModel; } + // FIXME: This should be removed // Enforce operand's \p OpName even alignment if required by target. // This is used if an operand is a 32 bit register but needs to be aligned // regardless. @@ -1641,7 +1717,7 @@ TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, /// skipping copy like instructions and subreg-manipulation pseudos. /// Following another subreg of a reg:subreg isn't supported. MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, - MachineRegisterInfo &MRI); + const MachineRegisterInfo &MRI); /// \brief Return false if EXEC is not changed between the def of \p VReg at \p /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not @@ -1661,86 +1737,86 @@ bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, namespace AMDGPU { LLVM_READONLY - int getVOPe64(uint16_t Opcode); + int64_t getVOPe64(uint32_t Opcode); LLVM_READONLY - int getVOPe32(uint16_t Opcode); + int64_t getVOPe32(uint32_t Opcode); LLVM_READONLY - int getSDWAOp(uint16_t Opcode); + int64_t getSDWAOp(uint32_t Opcode); LLVM_READONLY - int getDPPOp32(uint16_t Opcode); + int64_t getDPPOp32(uint32_t Opcode); LLVM_READONLY - int getDPPOp64(uint16_t Opcode); + int64_t getDPPOp64(uint32_t Opcode); LLVM_READONLY - int getBasicFromSDWAOp(uint16_t Opcode); + int64_t getBasicFromSDWAOp(uint32_t Opcode); LLVM_READONLY - int getCommuteRev(uint16_t Opcode); + int64_t getCommuteRev(uint32_t Opcode); LLVM_READONLY - int getCommuteOrig(uint16_t Opcode); + int64_t getCommuteOrig(uint32_t Opcode); LLVM_READONLY - int getAddr64Inst(uint16_t Opcode); + int64_t getAddr64Inst(uint32_t Opcode); /// Check if \p Opcode is an Addr64 opcode. /// /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. LLVM_READONLY - int getIfAddr64Inst(uint16_t Opcode); + int64_t getIfAddr64Inst(uint32_t Opcode); LLVM_READONLY - int getSOPKOp(uint16_t Opcode); + int64_t getSOPKOp(uint32_t Opcode); /// \returns SADDR form of a FLAT Global instruction given an \p Opcode /// of a VADDR form. LLVM_READONLY - int getGlobalSaddrOp(uint16_t Opcode); + int64_t getGlobalSaddrOp(uint32_t Opcode); /// \returns VADDR form of a FLAT Global instruction given an \p Opcode /// of a SADDR form. LLVM_READONLY - int getGlobalVaddrOp(uint16_t Opcode); + int64_t getGlobalVaddrOp(uint32_t Opcode); LLVM_READONLY - int getVCMPXNoSDstOp(uint16_t Opcode); + int64_t getVCMPXNoSDstOp(uint32_t Opcode); /// \returns ST form with only immediate offset of a FLAT Scratch instruction /// given an \p Opcode of an SS (SADDR) form. LLVM_READONLY - int getFlatScratchInstSTfromSS(uint16_t Opcode); + int64_t getFlatScratchInstSTfromSS(uint32_t Opcode); /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode /// of an SVS (SADDR + VADDR) form. LLVM_READONLY - int getFlatScratchInstSVfromSVS(uint16_t Opcode); + int64_t getFlatScratchInstSVfromSVS(uint32_t Opcode); /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode /// of an SV (VADDR) form. LLVM_READONLY - int getFlatScratchInstSSfromSV(uint16_t Opcode); + int64_t getFlatScratchInstSSfromSV(uint32_t Opcode); /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode /// of an SS (SADDR) form. LLVM_READONLY - int getFlatScratchInstSVfromSS(uint16_t Opcode); + int64_t getFlatScratchInstSVfromSS(uint32_t Opcode); /// \returns earlyclobber version of a MAC MFMA is exists. LLVM_READONLY - int getMFMAEarlyClobberOp(uint16_t Opcode); + int64_t getMFMAEarlyClobberOp(uint32_t Opcode); /// \returns Version of an MFMA instruction which uses AGPRs for srcC and /// vdst, given an \p Opcode of an MFMA which uses VGPRs for srcC/vdst. LLVM_READONLY - int getMFMASrcCVDstAGPROp(uint16_t Opcode); + int64_t getMFMASrcCVDstAGPROp(uint32_t Opcode); /// \returns v_cmpx version of a v_cmp instruction. LLVM_READONLY - int getVCMPXOpFromVCMP(uint16_t Opcode); + int64_t getVCMPXOpFromVCMP(uint32_t Opcode); const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); |
