From 603d18033c510c99ad84f26b6603db1ca68a500f Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Tue, 21 Dec 2021 17:27:14 +0100 Subject: [AMDGPU][InstCombine] Remove zero LOD bias If the bias is zero, we can remove it from the image instruction. Also copy other image optimizations (l->lz, mip->nomip) to IR combines. Differential Revision: https://reviews.llvm.org/D116042 --- .../Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 138 ++++++++++++++++----- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 34 +++++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 1 + llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 9 ++ 4 files changed, 148 insertions(+), 34 deletions(-) (limited to 'llvm/lib') diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 5eb7cf8..84363d3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -97,10 +97,92 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) { llvm_unreachable("Should never be called!"); } +/// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with +/// the modified arguments. +static Optional modifyIntrinsicCall( + IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC, + std::function &, SmallVectorImpl &)> + Func) { + SmallVector ArgTys; + if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys)) + return None; + + SmallVector Args(II.args()); + + // Modify arguments and types + Func(Args, ArgTys); + + Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys); + + CallInst *NewCall = IC.Builder.CreateCall(I, Args); + NewCall->takeName(&II); + NewCall->copyMetadata(II); + if (isa(NewCall)) + NewCall->copyFastMathFlags(&II); + + // Erase and replace uses + if (!II.getType()->isVoidTy()) + IC.replaceInstUsesWith(II, NewCall); + return IC.eraseInstFromFunction(II); +} + static Optional simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC) { + // Optimize _L to _LZ when _L is zero + if (const auto *LZMappingInfo = + AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) { + if (auto *ConstantLod = + dyn_cast(II.getOperand(ImageDimIntr->LodIndex))) { + if (ConstantLod->isZero() || ConstantLod->isNegative()) { + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ, + ImageDimIntr->Dim); + return modifyIntrinsicCall( + II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + Args.erase(Args.begin() + ImageDimIntr->LodIndex); + }); + } + } + } + + // Optimize _mip away, when 'lod' is zero + if (const auto *MIPMappingInfo = + AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) { + if (auto *ConstantMip = + dyn_cast(II.getOperand(ImageDimIntr->MipIndex))) { + if (ConstantMip->isZero()) { + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP, + ImageDimIntr->Dim); + return modifyIntrinsicCall( + II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + Args.erase(Args.begin() + ImageDimIntr->MipIndex); + }); + } + } + } + + // Optimize _bias away when 'bias' is zero + if (const auto *BiasMappingInfo = + AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) { + if (auto *ConstantBias = + dyn_cast(II.getOperand(ImageDimIntr->BiasIndex))) { + if (ConstantBias->isZero()) { + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias, + ImageDimIntr->Dim); + return modifyIntrinsicCall( + II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + Args.erase(Args.begin() + ImageDimIntr->BiasIndex); + ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg); + }); + } + } + } + + // Try to use A16 or G16 if (!ST->hasA16() && !ST->hasG16()) return None; @@ -144,43 +226,31 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext()) : Type::getInt16Ty(II.getContext()); - SmallVector ArgTys; - if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys)) - return None; - - ArgTys[ImageDimIntr->GradientTyArg] = CoordType; - if (!OnlyDerivatives) { - ArgTys[ImageDimIntr->CoordTyArg] = CoordType; - - // Change the bias type - if (ImageDimIntr->NumBiasArgs != 0) - ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext()); - } - Function *I = - Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys); - - SmallVector Args(II.args()); + return modifyIntrinsicCall( + II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) { + ArgTys[ImageDimIntr->GradientTyArg] = CoordType; + if (!OnlyDerivatives) { + ArgTys[ImageDimIntr->CoordTyArg] = CoordType; - unsigned EndIndex = - OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd; - for (unsigned OperandIndex = ImageDimIntr->GradientStart; - OperandIndex < EndIndex; OperandIndex++) { - Args[OperandIndex] = - convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder); - } + // Change the bias type + if (ImageDimIntr->NumBiasArgs != 0) + ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext()); + } - // Convert the bias - if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { - Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); - Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder); - } + unsigned EndIndex = + OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd; + for (unsigned OperandIndex = ImageDimIntr->GradientStart; + OperandIndex < EndIndex; OperandIndex++) { + Args[OperandIndex] = + convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder); + } - CallInst *NewCall = IC.Builder.CreateCall(I, Args); - NewCall->takeName(&II); - NewCall->copyMetadata(II); - if (isa(NewCall)) - NewCall->copyFastMathFlags(&II); - return IC.replaceInstUsesWith(II, NewCall); + // Convert the bias + if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { + Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); + Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder); + } + }); } bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 1d8a558..49eaa14 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -131,6 +131,22 @@ def MIMGMIPMappingTable : GenericTable { let PrimaryKeyName = "getMIMGMIPMappingInfo"; } +class MIMGBiasMapping { + MIMGBaseOpcode Bias = bias; + MIMGBaseOpcode NoBias = nobias; +} + +def MIMGBiasMappingTable : GenericTable { + let FilterClass = "MIMGBiasMapping"; + let CppTypeName = "MIMGBiasMappingInfo"; + let Fields = ["Bias", "NoBias"]; + string TypeOf_Bias = "MIMGBaseOpcode"; + string TypeOf_NoBias = "MIMGBaseOpcode"; + + let PrimaryKey = ["Bias"]; + let PrimaryKeyName = "getMIMGBiasMappingInfo"; +} + class MIMGG16Mapping { MIMGBaseOpcode G = g; MIMGBaseOpcode G16 = g16; @@ -1140,6 +1156,24 @@ def : MIMGLZMapping; def : MIMGMIPMapping; def : MIMGMIPMapping; +// Bias to NoBias Optimization Mapping +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; + // G to G16 Optimization Mapping def : MIMGG16Mapping; def : MIMGG16Mapping; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 6c7d73a..fa1fa5b 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -132,6 +132,7 @@ bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) { #define GET_MIMGInfoTable_IMPL #define GET_MIMGLZMappingTable_IMPL #define GET_MIMGMIPMappingTable_IMPL +#define GET_MIMGBiasMappingTable_IMPL #define GET_MIMGG16MappingTable_IMPL #include "AMDGPUGenSearchableTables.inc" diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 061c74c..cabae3d 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -64,6 +64,7 @@ struct GcnBufferFormatInfo { #define GET_MIMGEncoding_DECL #define GET_MIMGLZMapping_DECL #define GET_MIMGMIPMapping_DECL +#define GET_MIMGBiASMapping_DECL #include "AMDGPUGenSearchableTables.inc" namespace IsaInfo { @@ -330,6 +331,11 @@ struct MIMGMIPMappingInfo { MIMGBaseOpcode NONMIP; }; +struct MIMGBiasMappingInfo { + MIMGBaseOpcode Bias; + MIMGBaseOpcode NoBias; +}; + struct MIMGG16MappingInfo { MIMGBaseOpcode G; MIMGBaseOpcode G16; @@ -342,6 +348,9 @@ LLVM_READONLY const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); LLVM_READONLY +const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); + +LLVM_READONLY const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); LLVM_READONLY -- cgit v1.1