diff options
author | Mirko Brkusanin <Mirko.Brkusanin@amd.com> | 2020-09-29 16:35:42 +0200 |
---|---|---|
committer | Mirko Brkusanin <Mirko.Brkusanin@amd.com> | 2020-09-30 11:09:18 +0200 |
commit | 0249df33fec16b728e2d33cae02f5da4c9f74e38 (patch) | |
tree | c56a5a38c95fc6b9e4986d81a18486e0e8a9ea8c /llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | |
parent | 64e8fd540ecc38ee3daf942499091589785e2733 (diff) | |
download | llvm-0249df33fec16b728e2d33cae02f5da4c9f74e38.zip llvm-0249df33fec16b728e2d33cae02f5da4c9f74e38.tar.gz llvm-0249df33fec16b728e2d33cae02f5da4c9f74e38.tar.bz2 |
[AMDGPU] Do not generate mul with 1 in AMDGPU Atomic Optimizer
Check if operand of mul is constant value of one for certain atomic
instructions in order to avoid making unnecessary instructions when
-amdgpu-atomic-optimizer is present.
Differential Revision: https://reviews.llvm.org/D88315
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index c9d25d4..1215d9d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -404,6 +404,11 @@ static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op, } } +static Value *buildMul(IRBuilder<> &B, Value *LHS, Value *RHS) { + const ConstantInt *CI = dyn_cast<ConstantInt>(LHS); + return (CI && CI->isOne()) ? RHS : B.CreateMul(LHS, RHS); +} + void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, AtomicRMWInst::BinOp Op, unsigned ValIdx, @@ -523,7 +528,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, // old value times the number of active lanes. Value *const Ctpop = B.CreateIntCast( B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false); - NewV = B.CreateMul(V, Ctpop); + NewV = buildMul(B, V, Ctpop); break; } @@ -543,7 +548,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, // old value times the parity of the number of active lanes. Value *const Ctpop = B.CreateIntCast( B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false); - NewV = B.CreateMul(V, B.CreateAnd(Ctpop, 1)); + NewV = buildMul(B, V, B.CreateAnd(Ctpop, 1)); break; } } @@ -622,7 +627,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, llvm_unreachable("Unhandled atomic op"); case AtomicRMWInst::Add: case AtomicRMWInst::Sub: - LaneOffset = B.CreateMul(V, Mbcnt); + LaneOffset = buildMul(B, V, Mbcnt); break; case AtomicRMWInst::And: case AtomicRMWInst::Or: @@ -633,7 +638,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, LaneOffset = B.CreateSelect(Cond, Identity, V); break; case AtomicRMWInst::Xor: - LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1)); + LaneOffset = buildMul(B, V, B.CreateAnd(Mbcnt, 1)); break; } } |