aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorSebastian Neubauer <Sebastian.Neubauer@amd.com>2021-12-21 17:27:14 +0100
committerSebastian Neubauer <Sebastian.Neubauer@amd.com>2022-01-21 12:09:07 +0100
commit603d18033c510c99ad84f26b6603db1ca68a500f (patch)
tree6595a11948adab5311b561b689bc473cb9cc482a /llvm/lib
parent0530fdbbbb84ea3024a4a8f7156ff716f00ffd48 (diff)
downloadllvm-603d18033c510c99ad84f26b6603db1ca68a500f.zip
llvm-603d18033c510c99ad84f26b6603db1ca68a500f.tar.gz
llvm-603d18033c510c99ad84f26b6603db1ca68a500f.tar.bz2
[AMDGPU][InstCombine] Remove zero LOD bias
If the bias is zero, we can remove it from the image instruction. Also copy other image optimizations (l->lz, mip->nomip) to IR combines. Differential Revision: https://reviews.llvm.org/D116042
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp138
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td34
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h9
4 files changed, 148 insertions, 34 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 5eb7cf8..84363d3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -97,10 +97,92 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
llvm_unreachable("Should never be called!");
}
+/// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with
+/// the modified arguments.
+static Optional<Instruction *> modifyIntrinsicCall(
+ IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC,
+ std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
+ Func) {
+ SmallVector<Type *, 4> ArgTys;
+ if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
+ return None;
+
+ SmallVector<Value *, 8> Args(II.args());
+
+ // Modify arguments and types
+ Func(Args, ArgTys);
+
+ Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys);
+
+ CallInst *NewCall = IC.Builder.CreateCall(I, Args);
+ NewCall->takeName(&II);
+ NewCall->copyMetadata(II);
+ if (isa<FPMathOperator>(NewCall))
+ NewCall->copyFastMathFlags(&II);
+
+ // Erase and replace uses
+ if (!II.getType()->isVoidTy())
+ IC.replaceInstUsesWith(II, NewCall);
+ return IC.eraseInstFromFunction(II);
+}
+
static Optional<Instruction *>
simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
IntrinsicInst &II, InstCombiner &IC) {
+ // Optimize _L to _LZ when _L is zero
+ if (const auto *LZMappingInfo =
+ AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
+ if (auto *ConstantLod =
+ dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
+ if (ConstantLod->isZero() || ConstantLod->isNegative()) {
+ const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+ AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
+ ImageDimIntr->Dim);
+ return modifyIntrinsicCall(
+ II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+ Args.erase(Args.begin() + ImageDimIntr->LodIndex);
+ });
+ }
+ }
+ }
+
+ // Optimize _mip away, when 'lod' is zero
+ if (const auto *MIPMappingInfo =
+ AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
+ if (auto *ConstantMip =
+ dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
+ if (ConstantMip->isZero()) {
+ const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+ AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
+ ImageDimIntr->Dim);
+ return modifyIntrinsicCall(
+ II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+ Args.erase(Args.begin() + ImageDimIntr->MipIndex);
+ });
+ }
+ }
+ }
+
+ // Optimize _bias away when 'bias' is zero
+ if (const auto *BiasMappingInfo =
+ AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) {
+ if (auto *ConstantBias =
+ dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
+ if (ConstantBias->isZero()) {
+ const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+ AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
+ ImageDimIntr->Dim);
+ return modifyIntrinsicCall(
+ II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+ Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
+ ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
+ });
+ }
+ }
+ }
+
+ // Try to use A16 or G16
if (!ST->hasA16() && !ST->hasG16())
return None;
@@ -144,43 +226,31 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
: Type::getInt16Ty(II.getContext());
- SmallVector<Type *, 4> ArgTys;
- if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
- return None;
-
- ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
- if (!OnlyDerivatives) {
- ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
-
- // Change the bias type
- if (ImageDimIntr->NumBiasArgs != 0)
- ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
- }
- Function *I =
- Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys);
-
- SmallVector<Value *, 8> Args(II.args());
+ return modifyIntrinsicCall(
+ II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
+ ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
+ if (!OnlyDerivatives) {
+ ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
- unsigned EndIndex =
- OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
- for (unsigned OperandIndex = ImageDimIntr->GradientStart;
- OperandIndex < EndIndex; OperandIndex++) {
- Args[OperandIndex] =
- convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
- }
+ // Change the bias type
+ if (ImageDimIntr->NumBiasArgs != 0)
+ ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
+ }
- // Convert the bias
- if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
- Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
- Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
- }
+ unsigned EndIndex =
+ OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
+ for (unsigned OperandIndex = ImageDimIntr->GradientStart;
+ OperandIndex < EndIndex; OperandIndex++) {
+ Args[OperandIndex] =
+ convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
+ }
- CallInst *NewCall = IC.Builder.CreateCall(I, Args);
- NewCall->takeName(&II);
- NewCall->copyMetadata(II);
- if (isa<FPMathOperator>(NewCall))
- NewCall->copyFastMathFlags(&II);
- return IC.replaceInstUsesWith(II, NewCall);
+ // Convert the bias
+ if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
+ Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
+ Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
+ }
+ });
}
bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 1d8a558..49eaa14 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -131,6 +131,22 @@ def MIMGMIPMappingTable : GenericTable {
let PrimaryKeyName = "getMIMGMIPMappingInfo";
}
+class MIMGBiasMapping<MIMGBaseOpcode bias, MIMGBaseOpcode nobias> {
+ MIMGBaseOpcode Bias = bias;
+ MIMGBaseOpcode NoBias = nobias;
+}
+
+def MIMGBiasMappingTable : GenericTable {
+ let FilterClass = "MIMGBiasMapping";
+ let CppTypeName = "MIMGBiasMappingInfo";
+ let Fields = ["Bias", "NoBias"];
+ string TypeOf_Bias = "MIMGBaseOpcode";
+ string TypeOf_NoBias = "MIMGBaseOpcode";
+
+ let PrimaryKey = ["Bias"];
+ let PrimaryKeyName = "getMIMGBiasMappingInfo";
+}
+
class MIMGG16Mapping<MIMGBaseOpcode g, MIMGBaseOpcode g16> {
MIMGBaseOpcode G = g;
MIMGBaseOpcode G16 = g16;
@@ -1140,6 +1156,24 @@ def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;
+// Bias to NoBias Optimization Mapping
+def : MIMGBiasMapping<IMAGE_SAMPLE_B, IMAGE_SAMPLE>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL, IMAGE_SAMPLE_CL>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B, IMAGE_SAMPLE_C>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL, IMAGE_SAMPLE_C_CL>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_O, IMAGE_SAMPLE_O>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL_O, IMAGE_SAMPLE_CL_O>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_O, IMAGE_SAMPLE_C_O>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL_O, IMAGE_SAMPLE_C_CL_O>;
+def : MIMGBiasMapping<IMAGE_GATHER4_B, IMAGE_GATHER4>;
+def : MIMGBiasMapping<IMAGE_GATHER4_B_CL, IMAGE_GATHER4_CL>;
+def : MIMGBiasMapping<IMAGE_GATHER4_C_B, IMAGE_GATHER4_C>;
+def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL, IMAGE_GATHER4_C_CL>;
+def : MIMGBiasMapping<IMAGE_GATHER4_B_O, IMAGE_GATHER4_O>;
+def : MIMGBiasMapping<IMAGE_GATHER4_B_CL_O, IMAGE_GATHER4_CL_O>;
+def : MIMGBiasMapping<IMAGE_GATHER4_C_B_O, IMAGE_GATHER4_C_O>;
+def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL_O, IMAGE_GATHER4_C_CL_O>;
+
// G to G16 Optimization Mapping
def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>;
def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL, IMAGE_SAMPLE_D_CL_G16>;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 6c7d73a..fa1fa5b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -132,6 +132,7 @@ bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) {
#define GET_MIMGInfoTable_IMPL
#define GET_MIMGLZMappingTable_IMPL
#define GET_MIMGMIPMappingTable_IMPL
+#define GET_MIMGBiasMappingTable_IMPL
#define GET_MIMGG16MappingTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 061c74c..cabae3d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -64,6 +64,7 @@ struct GcnBufferFormatInfo {
#define GET_MIMGEncoding_DECL
#define GET_MIMGLZMapping_DECL
#define GET_MIMGMIPMapping_DECL
+#define GET_MIMGBiASMapping_DECL
#include "AMDGPUGenSearchableTables.inc"
namespace IsaInfo {
@@ -330,6 +331,11 @@ struct MIMGMIPMappingInfo {
MIMGBaseOpcode NONMIP;
};
+struct MIMGBiasMappingInfo {
+ MIMGBaseOpcode Bias;
+ MIMGBaseOpcode NoBias;
+};
+
struct MIMGG16MappingInfo {
MIMGBaseOpcode G;
MIMGBaseOpcode G16;
@@ -342,6 +348,9 @@ LLVM_READONLY
const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
LLVM_READONLY
+const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
+
+LLVM_READONLY
const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
LLVM_READONLY