diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp | 17 |
1 files changed, 4 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp index 65e6ed9..c52eb4e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp @@ -57,10 +57,11 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II, const UniformityInfo &UI, ValueMap<const Value *, bool> &Tracker) { llvm::Intrinsic::ID IID = II.getIntrinsicID(); - + /// We deliberately do not simplify readfirstlane with a uniform argument, so + /// that frontends can use it to force a copy to SGPR and thereby prevent the + /// backend from generating unwanted waterfall loops. switch (IID) { case Intrinsic::amdgcn_permlane64: - case Intrinsic::amdgcn_readfirstlane: case Intrinsic::amdgcn_readlane: { Value *Src = II.getArgOperand(0); if (isDivergentUseWithNew(II.getOperandUse(0), UI, Tracker)) @@ -107,7 +108,7 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II, return Changed; } default: - llvm_unreachable("Unexpected intrinsic ID in optimizeUniformIntrinsic"); + return false; } return false; } @@ -121,16 +122,6 @@ static bool runUniformIntrinsicCombine(Function &F, const UniformityInfo &UI) { auto *II = dyn_cast<IntrinsicInst>(&I); if (!II) continue; - - switch (II->getIntrinsicID()) { - case Intrinsic::amdgcn_permlane64: - case Intrinsic::amdgcn_readfirstlane: - case Intrinsic::amdgcn_readlane: - case Intrinsic::amdgcn_ballot: - break; - default: - continue; - } IsChanged |= optimizeUniformIntrinsic(*II, UI, Tracker); } return IsChanged; |
