aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp37
1 files changed, 28 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index be42291..b34ab2a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9134,16 +9134,23 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
SDLoc DL(Op);
MachineFunction &MF = DAG.getMachineFunction();
const GCNSubtarget *ST = &MF.getSubtarget<GCNSubtarget>();
+ unsigned IntrOpcode = Intr->BaseOpcode;
+ // For image atomic: use no-return opcode if result is unused.
+ if (Intr->AtomicNoRetBaseOpcode != Intr->BaseOpcode &&
+ !Op.getNode()->hasAnyUseOfValue(0))
+ IntrOpcode = Intr->AtomicNoRetBaseOpcode;
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
- AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
+ AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
- unsigned IntrOpcode = Intr->BaseOpcode;
bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
bool IsGFX12Plus = AMDGPU::isGFX12Plus(*Subtarget);
SmallVector<EVT, 3> ResultTypes(Op->values());
SmallVector<EVT, 3> OrigResultTypes(Op->values());
+ if (BaseOpcode->NoReturn && BaseOpcode->Atomic)
+ ResultTypes.erase(&ResultTypes[0]);
+
bool IsD16 = false;
bool IsG16 = false;
bool IsA16 = false;
@@ -9162,8 +9169,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
VData = Op.getOperand(2);
IsAtomicPacked16Bit =
- (Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
- Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16);
+ (IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
+ IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16_NORTN ||
+ IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16 ||
+ IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16_NORTN);
bool Is64Bit = VData.getValueSizeInBits() == 64;
if (BaseOpcode->AtomicX2) {
@@ -9173,7 +9182,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
if (Is64Bit)
VData = DAG.getBitcast(MVT::v4i32, VData);
- ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32;
+ if (!BaseOpcode->NoReturn)
+ ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32;
+
DMask = Is64Bit ? 0xf : 0x3;
NumVDataDwords = Is64Bit ? 4 : 2;
} else {
@@ -9399,8 +9410,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
unsigned CPol = Op.getConstantOperandVal(ArgOffset + Intr->CachePolicyIndex);
- if (BaseOpcode->Atomic)
- CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
+ // Keep GLC only when the atomic's result is actually used.
+ if (BaseOpcode->Atomic && !BaseOpcode->NoReturn)
+ CPol |= AMDGPU::CPol::GLC;
if (CPol & ~((IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12) |
AMDGPU::CPol::VOLATILE))
return Op;
@@ -9512,13 +9524,20 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
DAG.setNodeMemRefs(NewNode, {MemRef});
}
+ if (BaseOpcode->NoReturn) {
+ if (BaseOpcode->Atomic)
+ return DAG.getMergeValues(
+ {DAG.getPOISON(OrigResultTypes[0]), SDValue(NewNode, 0)}, DL);
+
+ return SDValue(NewNode, 0);
+ }
+
if (BaseOpcode->AtomicX2) {
SmallVector<SDValue, 1> Elt;
DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1);
return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL);
}
- if (BaseOpcode->NoReturn)
- return SDValue(NewNode, 0);
+
return constructRetValue(DAG, NewNode, OrigResultTypes, IsTexFail,
Subtarget->hasUnpackedD16VMem(), IsD16, DMaskLanes,
NumVDataDwords, IsAtomicPacked16Bit, DL);