aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp34
1 files changed, 30 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 3f61bbd..f20b22d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6122,10 +6122,11 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
!Op.isIdenticalTo(*MO))
return false;
- // Do not fold a frame index into an instruction that already has a frame
- // index. The frame index handling code doesn't handle fixing up operand
- // constraints if there are multiple indexes.
- if (Op.isFI() && MO->isFI())
+ // Do not fold a non-inlineable and non-register operand into an
+ // instruction that already has a frame index. The frame index handling
+ // code could not handle well when a frame index co-exists with another
+ // non-register operand, unless that operand is an inlineable immediate.
+ if (Op.isFI())
return false;
}
} else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
@@ -10073,7 +10074,30 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
InstructionUniformity
SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
unsigned opcode = MI.getOpcode();
+
+ auto HandleAddrSpaceCast = [this, &MRI](const MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = isa<GIntrinsic>(MI) ? MI.getOperand(2).getReg()
+ : MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+ unsigned DstAS = DstTy.getAddressSpace();
+ unsigned SrcAS = SrcTy.getAddressSpace();
+ return SrcAS == AMDGPUAS::PRIVATE_ADDRESS &&
+ DstAS == AMDGPUAS::FLAT_ADDRESS &&
+ ST.hasGloballyAddressableScratch()
+ ? InstructionUniformity::NeverUniform
+ : InstructionUniformity::Default;
+ };
+
+ // If the target supports globally addressable scratch, the mapping from
+ // scratch memory to the flat aperture changes therefore an address space cast
+ // is no longer uniform.
+ if (opcode == TargetOpcode::G_ADDRSPACE_CAST)
+ return HandleAddrSpaceCast(MI);
+
if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
auto IID = GI->getIntrinsicID();
if (AMDGPU::isIntrinsicSourceOfDivergence(IID))
@@ -10082,6 +10106,8 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
return InstructionUniformity::AlwaysUniform;
switch (IID) {
+ case Intrinsic::amdgcn_addrspacecast_nonnull:
+ return HandleAddrSpaceCast(MI);
case Intrinsic::amdgcn_if:
case Intrinsic::amdgcn_else:
// FIXME: Uniform if second result