aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp22
1 files changed, 16 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 97c2c9c..9ce1224 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2006,19 +2006,27 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
+ unsigned IntrOpcode = Intr->BaseOpcode;
+
+ // For image atomic: use no-return opcode if result is unused.
+ if (Intr->AtomicNoRetBaseOpcode != Intr->BaseOpcode) {
+ Register ResultDef = MI.getOperand(0).getReg();
+ if (MRI->use_nodbg_empty(ResultDef))
+ IntrOpcode = Intr->AtomicNoRetBaseOpcode;
+ }
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
- AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
+ AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
- unsigned IntrOpcode = Intr->BaseOpcode;
const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI);
const bool IsGFX11Plus = AMDGPU::isGFX11Plus(STI);
const bool IsGFX12Plus = AMDGPU::isGFX12Plus(STI);
const unsigned ArgOffset = MI.getNumExplicitDefs() + 1;
- Register VDataIn, VDataOut;
+ Register VDataIn = AMDGPU::NoRegister;
+ Register VDataOut = AMDGPU::NoRegister;
LLT VDataTy;
int NumVDataDwords = -1;
bool IsD16 = MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
@@ -2049,7 +2057,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
unsigned DMaskLanes = 0;
if (BaseOpcode->Atomic) {
- VDataOut = MI.getOperand(0).getReg();
+ if (!BaseOpcode->NoReturn)
+ VDataOut = MI.getOperand(0).getReg();
VDataIn = MI.getOperand(2).getReg();
LLT Ty = MRI->getType(VDataIn);
@@ -2099,8 +2108,9 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this");
unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();
- if (BaseOpcode->Atomic)
- CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
+ // Keep GLC only when the atomic's result is actually used.
+ if (BaseOpcode->Atomic && !BaseOpcode->NoReturn)
+ CPol |= AMDGPU::CPol::GLC;
if (CPol & ~((IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12) |
AMDGPU::CPol::VOLATILE))
return false;