diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 34 |
1 files changed, 30 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 3f61bbd..f20b22d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6122,10 +6122,11 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, !Op.isIdenticalTo(*MO)) return false; - // Do not fold a frame index into an instruction that already has a frame - // index. The frame index handling code doesn't handle fixing up operand - // constraints if there are multiple indexes. - if (Op.isFI() && MO->isFI()) + // Do not fold a non-inlineable and non-register operand into an + // instruction that already has a frame index. The frame index handling + // code could not handle well when a frame index co-exists with another + // non-register operand, unless that operand is an inlineable immediate. + if (Op.isFI()) return false; } } else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() && @@ -10073,7 +10074,30 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, InstructionUniformity SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const { + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); unsigned opcode = MI.getOpcode(); + + auto HandleAddrSpaceCast = [this, &MRI](const MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = isa<GIntrinsic>(MI) ? MI.getOperand(2).getReg() + : MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + unsigned DstAS = DstTy.getAddressSpace(); + unsigned SrcAS = SrcTy.getAddressSpace(); + return SrcAS == AMDGPUAS::PRIVATE_ADDRESS && + DstAS == AMDGPUAS::FLAT_ADDRESS && + ST.hasGloballyAddressableScratch() + ? InstructionUniformity::NeverUniform + : InstructionUniformity::Default; + }; + + // If the target supports globally addressable scratch, the mapping from + // scratch memory to the flat aperture changes therefore an address space cast + // is no longer uniform. + if (opcode == TargetOpcode::G_ADDRSPACE_CAST) + return HandleAddrSpaceCast(MI); + if (auto *GI = dyn_cast<GIntrinsic>(&MI)) { auto IID = GI->getIntrinsicID(); if (AMDGPU::isIntrinsicSourceOfDivergence(IID)) @@ -10082,6 +10106,8 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const { return InstructionUniformity::AlwaysUniform; switch (IID) { + case Intrinsic::amdgcn_addrspacecast_nonnull: + return HandleAddrSpaceCast(MI); case Intrinsic::amdgcn_if: case Intrinsic::amdgcn_else: // FIXME: Uniform if second result |