diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 101 |
1 files changed, 97 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 3f61bbd..19e6bcf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -18,6 +18,7 @@ #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/LiveIntervals.h" @@ -5534,6 +5535,15 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } + // See SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more + // information. + if (AMDGPU::isPackedFP32Inst(Opcode) && AMDGPU::isGFX12Plus(ST)) { + for (unsigned I = 0; I < 3; ++I) { + if (!isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, I)) + return false; + } + } + return true; } @@ -6005,6 +6015,21 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx, const MCOperandInfo OpInfo = MI.getDesc().operands()[OpIdx]; unsigned Opc = MI.getOpcode(); + // See SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more + // information. + if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) && + MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) { + constexpr const AMDGPU::OpName OpNames[] = { + AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2}; + + for (auto [I, OpName] : enumerate(OpNames)) { + int SrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[I]); + if (static_cast<unsigned>(SrcIdx) == OpIdx && + !isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, I, &MO)) + return false; + } + } + if (!isLegalRegOperand(MRI, OpInfo, MO)) return false; @@ -6053,6 +6078,39 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, return true; } +bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand( + const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, + const MachineOperand *MO) const { + constexpr const unsigned NumOps = 3; + constexpr const AMDGPU::OpName OpNames[NumOps * 2] = { + AMDGPU::OpName::src0, AMDGPU::OpName::src1, + AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers}; + + assert(SrcN < NumOps); + + if (!MO) { + int SrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[SrcN]); + if (SrcIdx == -1) + return true; + MO = &MI.getOperand(SrcIdx); + } + + if (!MO->isReg() || !RI.isSGPRReg(MRI, MO->getReg())) + return true; + + int ModsIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[NumOps + SrcN]); + if (ModsIdx == -1) + return true; + + unsigned Mods = MI.getOperand(ModsIdx).getImm(); + bool OpSel = Mods & SISrcMods::OP_SEL_0; + bool OpSelHi = Mods & SISrcMods::OP_SEL_1; + + return !OpSel && !OpSelHi; +} + bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO) const { const MachineFunction &MF = *MI.getParent()->getParent(); @@ -6122,10 +6180,11 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, !Op.isIdenticalTo(*MO)) return false; - // Do not fold a frame index into an instruction that already has a frame - // index. The frame index handling code doesn't handle fixing up operand - // constraints if there are multiple indexes. - if (Op.isFI() && MO->isFI()) + // Do not fold a non-inlineable and non-register operand into an + // instruction that already has a frame index. The frame index handling + // code could not handle well when a frame index co-exists with another + // non-register operand, unless that operand is an inlineable immediate. + if (Op.isFI()) return false; } } else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() && @@ -6389,6 +6448,15 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) && !RI.isVGPR(MRI, MI.getOperand(VOP3Idx[2]).getReg())) legalizeOpWithMove(MI, VOP3Idx[2]); + + // Fix the register class of packed FP32 instructions on gfx12+. See + // SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more information. + if (AMDGPU::isPackedFP32Inst(Opc) && AMDGPU::isGFX12Plus(ST)) { + for (unsigned I = 0; I < 3; ++I) { + if (!isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, /*SrcN=*/I)) + legalizeOpWithMove(MI, VOP3Idx[I]); + } + } } Register SIInstrInfo::readlaneVGPRToSGPR( @@ -10073,7 +10141,30 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, InstructionUniformity SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const { + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); unsigned opcode = MI.getOpcode(); + + auto HandleAddrSpaceCast = [this, &MRI](const MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = isa<GIntrinsic>(MI) ? MI.getOperand(2).getReg() + : MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + unsigned DstAS = DstTy.getAddressSpace(); + unsigned SrcAS = SrcTy.getAddressSpace(); + return SrcAS == AMDGPUAS::PRIVATE_ADDRESS && + DstAS == AMDGPUAS::FLAT_ADDRESS && + ST.hasGloballyAddressableScratch() + ? InstructionUniformity::NeverUniform + : InstructionUniformity::Default; + }; + + // If the target supports globally addressable scratch, the mapping from + // scratch memory to the flat aperture changes therefore an address space cast + // is no longer uniform. + if (opcode == TargetOpcode::G_ADDRSPACE_CAST) + return HandleAddrSpaceCast(MI); + if (auto *GI = dyn_cast<GIntrinsic>(&MI)) { auto IID = GI->getIntrinsicID(); if (AMDGPU::isIntrinsicSourceOfDivergence(IID)) @@ -10082,6 +10173,8 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const { return InstructionUniformity::AlwaysUniform; switch (IID) { + case Intrinsic::amdgcn_addrspacecast_nonnull: + return HandleAddrSpaceCast(MI); case Intrinsic::amdgcn_if: case Intrinsic::amdgcn_else: // FIXME: Uniform if second result |