diff options
author | Sebastian Neubauer <Sebastian.Neubauer@amd.com> | 2021-12-21 17:27:14 +0100 |
---|---|---|
committer | Sebastian Neubauer <Sebastian.Neubauer@amd.com> | 2022-01-21 12:09:07 +0100 |
commit | 603d18033c510c99ad84f26b6603db1ca68a500f (patch) | |
tree | 6595a11948adab5311b561b689bc473cb9cc482a /llvm/lib | |
parent | 0530fdbbbb84ea3024a4a8f7156ff716f00ffd48 (diff) | |
download | llvm-603d18033c510c99ad84f26b6603db1ca68a500f.zip llvm-603d18033c510c99ad84f26b6603db1ca68a500f.tar.gz llvm-603d18033c510c99ad84f26b6603db1ca68a500f.tar.bz2 |
[AMDGPU][InstCombine] Remove zero LOD bias
If the bias is zero, we can remove it from the image instruction.
Also copy other image optimizations (l->lz, mip->nomip) to IR combines.
Differential Revision: https://reviews.llvm.org/D116042
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 138 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MIMGInstructions.td | 34 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 9 |
4 files changed, 148 insertions, 34 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 5eb7cf8..84363d3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -97,10 +97,92 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) { llvm_unreachable("Should never be called!"); } +/// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with +/// the modified arguments. +static Optional<Instruction *> modifyIntrinsicCall( + IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC, + std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)> + Func) { + SmallVector<Type *, 4> ArgTys; + if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys)) + return None; + + SmallVector<Value *, 8> Args(II.args()); + + // Modify arguments and types + Func(Args, ArgTys); + + Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys); + + CallInst *NewCall = IC.Builder.CreateCall(I, Args); + NewCall->takeName(&II); + NewCall->copyMetadata(II); + if (isa<FPMathOperator>(NewCall)) + NewCall->copyFastMathFlags(&II); + + // Erase and replace uses + if (!II.getType()->isVoidTy()) + IC.replaceInstUsesWith(II, NewCall); + return IC.eraseInstFromFunction(II); +} + static Optional<Instruction *> simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC) { + // Optimize _L to _LZ when _L is zero + if (const auto *LZMappingInfo = + AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) { + if (auto *ConstantLod = + dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) { + if (ConstantLod->isZero() || ConstantLod->isNegative()) { + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ, + ImageDimIntr->Dim); + return modifyIntrinsicCall( + II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + Args.erase(Args.begin() + ImageDimIntr->LodIndex); + }); + } + } + } + + // Optimize _mip away, when 'lod' is zero + if (const auto *MIPMappingInfo = + AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) { + if (auto *ConstantMip = + dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) { + if (ConstantMip->isZero()) { + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP, + ImageDimIntr->Dim); + return modifyIntrinsicCall( + II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + Args.erase(Args.begin() + ImageDimIntr->MipIndex); + }); + } + } + } + + // Optimize _bias away when 'bias' is zero + if (const auto *BiasMappingInfo = + AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) { + if (auto *ConstantBias = + dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) { + if (ConstantBias->isZero()) { + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias, + ImageDimIntr->Dim); + return modifyIntrinsicCall( + II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + Args.erase(Args.begin() + ImageDimIntr->BiasIndex); + ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg); + }); + } + } + } + + // Try to use A16 or G16 if (!ST->hasA16() && !ST->hasG16()) return None; @@ -144,43 +226,31 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext()) : Type::getInt16Ty(II.getContext()); - SmallVector<Type *, 4> ArgTys; - if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys)) - return None; - - ArgTys[ImageDimIntr->GradientTyArg] = CoordType; - if (!OnlyDerivatives) { - ArgTys[ImageDimIntr->CoordTyArg] = CoordType; - - // Change the bias type - if (ImageDimIntr->NumBiasArgs != 0) - ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext()); - } - Function *I = - Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys); - - SmallVector<Value *, 8> Args(II.args()); + return modifyIntrinsicCall( + II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) { + ArgTys[ImageDimIntr->GradientTyArg] = CoordType; + if (!OnlyDerivatives) { + ArgTys[ImageDimIntr->CoordTyArg] = CoordType; - unsigned EndIndex = - OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd; - for (unsigned OperandIndex = ImageDimIntr->GradientStart; - OperandIndex < EndIndex; OperandIndex++) { - Args[OperandIndex] = - convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder); - } + // Change the bias type + if (ImageDimIntr->NumBiasArgs != 0) + ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext()); + } - // Convert the bias - if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { - Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); - Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder); - } + unsigned EndIndex = + OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd; + for (unsigned OperandIndex = ImageDimIntr->GradientStart; + OperandIndex < EndIndex; OperandIndex++) { + Args[OperandIndex] = + convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder); + } - CallInst *NewCall = IC.Builder.CreateCall(I, Args); - NewCall->takeName(&II); - NewCall->copyMetadata(II); - if (isa<FPMathOperator>(NewCall)) - NewCall->copyFastMathFlags(&II); - return IC.replaceInstUsesWith(II, NewCall); + // Convert the bias + if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { + Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); + Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder); + } + }); } bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 1d8a558..49eaa14 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -131,6 +131,22 @@ def MIMGMIPMappingTable : GenericTable { let PrimaryKeyName = "getMIMGMIPMappingInfo"; } +class MIMGBiasMapping<MIMGBaseOpcode bias, MIMGBaseOpcode nobias> { + MIMGBaseOpcode Bias = bias; + MIMGBaseOpcode NoBias = nobias; +} + +def MIMGBiasMappingTable : GenericTable { + let FilterClass = "MIMGBiasMapping"; + let CppTypeName = "MIMGBiasMappingInfo"; + let Fields = ["Bias", "NoBias"]; + string TypeOf_Bias = "MIMGBaseOpcode"; + string TypeOf_NoBias = "MIMGBaseOpcode"; + + let PrimaryKey = ["Bias"]; + let PrimaryKeyName = "getMIMGBiasMappingInfo"; +} + class MIMGG16Mapping<MIMGBaseOpcode g, MIMGBaseOpcode g16> { MIMGBaseOpcode G = g; MIMGBaseOpcode G16 = g16; @@ -1140,6 +1156,24 @@ def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>; def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>; def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>; +// Bias to NoBias Optimization Mapping +def : MIMGBiasMapping<IMAGE_SAMPLE_B, IMAGE_SAMPLE>; +def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL, IMAGE_SAMPLE_CL>; +def : MIMGBiasMapping<IMAGE_SAMPLE_C_B, IMAGE_SAMPLE_C>; +def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL, IMAGE_SAMPLE_C_CL>; +def : MIMGBiasMapping<IMAGE_SAMPLE_B_O, IMAGE_SAMPLE_O>; +def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL_O, IMAGE_SAMPLE_CL_O>; +def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_O, IMAGE_SAMPLE_C_O>; +def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL_O, IMAGE_SAMPLE_C_CL_O>; +def : MIMGBiasMapping<IMAGE_GATHER4_B, IMAGE_GATHER4>; +def : MIMGBiasMapping<IMAGE_GATHER4_B_CL, IMAGE_GATHER4_CL>; +def : MIMGBiasMapping<IMAGE_GATHER4_C_B, IMAGE_GATHER4_C>; +def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL, IMAGE_GATHER4_C_CL>; +def : MIMGBiasMapping<IMAGE_GATHER4_B_O, IMAGE_GATHER4_O>; +def : MIMGBiasMapping<IMAGE_GATHER4_B_CL_O, IMAGE_GATHER4_CL_O>; +def : MIMGBiasMapping<IMAGE_GATHER4_C_B_O, IMAGE_GATHER4_C_O>; +def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL_O, IMAGE_GATHER4_C_CL_O>; + // G to G16 Optimization Mapping def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>; def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL, IMAGE_SAMPLE_D_CL_G16>; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 6c7d73a..fa1fa5b 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -132,6 +132,7 @@ bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) { #define GET_MIMGInfoTable_IMPL #define GET_MIMGLZMappingTable_IMPL #define GET_MIMGMIPMappingTable_IMPL +#define GET_MIMGBiasMappingTable_IMPL #define GET_MIMGG16MappingTable_IMPL #include "AMDGPUGenSearchableTables.inc" diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 061c74c..cabae3d 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -64,6 +64,7 @@ struct GcnBufferFormatInfo { #define GET_MIMGEncoding_DECL #define GET_MIMGLZMapping_DECL #define GET_MIMGMIPMapping_DECL +#define GET_MIMGBiASMapping_DECL #include "AMDGPUGenSearchableTables.inc" namespace IsaInfo { @@ -330,6 +331,11 @@ struct MIMGMIPMappingInfo { MIMGBaseOpcode NONMIP; }; +struct MIMGBiasMappingInfo { + MIMGBaseOpcode Bias; + MIMGBaseOpcode NoBias; +}; + struct MIMGG16MappingInfo { MIMGBaseOpcode G; MIMGBaseOpcode G16; @@ -342,6 +348,9 @@ LLVM_READONLY const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); LLVM_READONLY +const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); + +LLVM_READONLY const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); LLVM_READONLY |