aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp101
1 files changed, 97 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 3f61bbd..19e6bcf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -18,6 +18,7 @@
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -5534,6 +5535,15 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ // See SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more
+ // information.
+ if (AMDGPU::isPackedFP32Inst(Opcode) && AMDGPU::isGFX12Plus(ST)) {
+ for (unsigned I = 0; I < 3; ++I) {
+ if (!isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, I))
+ return false;
+ }
+ }
+
return true;
}
@@ -6005,6 +6015,21 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
const MCOperandInfo OpInfo = MI.getDesc().operands()[OpIdx];
unsigned Opc = MI.getOpcode();
+ // See SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more
+ // information.
+ if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) &&
+ MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) {
+ constexpr const AMDGPU::OpName OpNames[] = {
+ AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
+
+ for (auto [I, OpName] : enumerate(OpNames)) {
+ int SrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[I]);
+ if (static_cast<unsigned>(SrcIdx) == OpIdx &&
+ !isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, I, &MO))
+ return false;
+ }
+ }
+
if (!isLegalRegOperand(MRI, OpInfo, MO))
return false;
@@ -6053,6 +6078,39 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
return true;
}
+bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand(
+ const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,
+ const MachineOperand *MO) const {
+ constexpr const unsigned NumOps = 3;
+ constexpr const AMDGPU::OpName OpNames[NumOps * 2] = {
+ AMDGPU::OpName::src0, AMDGPU::OpName::src1,
+ AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
+
+ assert(SrcN < NumOps);
+
+ if (!MO) {
+ int SrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[SrcN]);
+ if (SrcIdx == -1)
+ return true;
+ MO = &MI.getOperand(SrcIdx);
+ }
+
+ if (!MO->isReg() || !RI.isSGPRReg(MRI, MO->getReg()))
+ return true;
+
+ int ModsIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[NumOps + SrcN]);
+ if (ModsIdx == -1)
+ return true;
+
+ unsigned Mods = MI.getOperand(ModsIdx).getImm();
+ bool OpSel = Mods & SISrcMods::OP_SEL_0;
+ bool OpSelHi = Mods & SISrcMods::OP_SEL_1;
+
+ return !OpSel && !OpSelHi;
+}
+
bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
const MachineOperand *MO) const {
const MachineFunction &MF = *MI.getParent()->getParent();
@@ -6122,10 +6180,11 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
!Op.isIdenticalTo(*MO))
return false;
- // Do not fold a frame index into an instruction that already has a frame
- // index. The frame index handling code doesn't handle fixing up operand
- // constraints if there are multiple indexes.
- if (Op.isFI() && MO->isFI())
+ // Do not fold a non-inlineable and non-register operand into an
+ // instruction that already has a frame index. The frame index handling
+ // code could not handle well when a frame index co-exists with another
+ // non-register operand, unless that operand is an inlineable immediate.
+ if (Op.isFI())
return false;
}
} else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
@@ -6389,6 +6448,15 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
!RI.isVGPR(MRI, MI.getOperand(VOP3Idx[2]).getReg()))
legalizeOpWithMove(MI, VOP3Idx[2]);
+
+ // Fix the register class of packed FP32 instructions on gfx12+. See
+ // SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more information.
+ if (AMDGPU::isPackedFP32Inst(Opc) && AMDGPU::isGFX12Plus(ST)) {
+ for (unsigned I = 0; I < 3; ++I) {
+ if (!isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, /*SrcN=*/I))
+ legalizeOpWithMove(MI, VOP3Idx[I]);
+ }
+ }
}
Register SIInstrInfo::readlaneVGPRToSGPR(
@@ -10073,7 +10141,30 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
InstructionUniformity
SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
unsigned opcode = MI.getOpcode();
+
+ auto HandleAddrSpaceCast = [this, &MRI](const MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = isa<GIntrinsic>(MI) ? MI.getOperand(2).getReg()
+ : MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+ unsigned DstAS = DstTy.getAddressSpace();
+ unsigned SrcAS = SrcTy.getAddressSpace();
+ return SrcAS == AMDGPUAS::PRIVATE_ADDRESS &&
+ DstAS == AMDGPUAS::FLAT_ADDRESS &&
+ ST.hasGloballyAddressableScratch()
+ ? InstructionUniformity::NeverUniform
+ : InstructionUniformity::Default;
+ };
+
+ // If the target supports globally addressable scratch, the mapping from
+ // scratch memory to the flat aperture changes therefore an address space cast
+ // is no longer uniform.
+ if (opcode == TargetOpcode::G_ADDRSPACE_CAST)
+ return HandleAddrSpaceCast(MI);
+
if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
auto IID = GI->getIntrinsicID();
if (AMDGPU::isIntrinsicSourceOfDivergence(IID))
@@ -10082,6 +10173,8 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
return InstructionUniformity::AlwaysUniform;
switch (IID) {
+ case Intrinsic::amdgcn_addrspacecast_nonnull:
+ return HandleAddrSpaceCast(MI);
case Intrinsic::amdgcn_if:
case Intrinsic::amdgcn_else:
// FIXME: Uniform if second result