aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp17
1 files changed, 4 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp
index 65e6ed9..c52eb4e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp
@@ -57,10 +57,11 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
const UniformityInfo &UI,
ValueMap<const Value *, bool> &Tracker) {
llvm::Intrinsic::ID IID = II.getIntrinsicID();
-
+ /// We deliberately do not simplify readfirstlane with a uniform argument, so
+ /// that frontends can use it to force a copy to SGPR and thereby prevent the
+ /// backend from generating unwanted waterfall loops.
switch (IID) {
case Intrinsic::amdgcn_permlane64:
- case Intrinsic::amdgcn_readfirstlane:
case Intrinsic::amdgcn_readlane: {
Value *Src = II.getArgOperand(0);
if (isDivergentUseWithNew(II.getOperandUse(0), UI, Tracker))
@@ -107,7 +108,7 @@ static bool optimizeUniformIntrinsic(IntrinsicInst &II,
return Changed;
}
default:
- llvm_unreachable("Unexpected intrinsic ID in optimizeUniformIntrinsic");
+ return false;
}
return false;
}
@@ -121,16 +122,6 @@ static bool runUniformIntrinsicCombine(Function &F, const UniformityInfo &UI) {
auto *II = dyn_cast<IntrinsicInst>(&I);
if (!II)
continue;
-
- switch (II->getIntrinsicID()) {
- case Intrinsic::amdgcn_permlane64:
- case Intrinsic::amdgcn_readfirstlane:
- case Intrinsic::amdgcn_readlane:
- case Intrinsic::amdgcn_ballot:
- break;
- default:
- continue;
- }
IsChanged |= optimizeUniformIntrinsic(*II, UI, Tracker);
}
return IsChanged;