aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
diff options
context:
space:
mode:
authorMirko Brkusanin <Mirko.Brkusanin@amd.com>2020-09-29 16:35:42 +0200
committerMirko Brkusanin <Mirko.Brkusanin@amd.com>2020-09-30 11:09:18 +0200
commit0249df33fec16b728e2d33cae02f5da4c9f74e38 (patch)
treec56a5a38c95fc6b9e4986d81a18486e0e8a9ea8c /llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
parent64e8fd540ecc38ee3daf942499091589785e2733 (diff)
downloadllvm-0249df33fec16b728e2d33cae02f5da4c9f74e38.zip
llvm-0249df33fec16b728e2d33cae02f5da4c9f74e38.tar.gz
llvm-0249df33fec16b728e2d33cae02f5da4c9f74e38.tar.bz2
[AMDGPU] Do not generate mul with 1 in AMDGPU Atomic Optimizer
Check if operand of mul is constant value of one for certain atomic instructions in order to avoid making unnecessary instructions when -amdgpu-atomic-optimizer is present. Differential Revision: https://reviews.llvm.org/D88315
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp13
1 files changed, 9 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index c9d25d4..1215d9d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -404,6 +404,11 @@ static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op,
}
}
+static Value *buildMul(IRBuilder<> &B, Value *LHS, Value *RHS) {
+ const ConstantInt *CI = dyn_cast<ConstantInt>(LHS);
+ return (CI && CI->isOne()) ? RHS : B.CreateMul(LHS, RHS);
+}
+
void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
AtomicRMWInst::BinOp Op,
unsigned ValIdx,
@@ -523,7 +528,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// old value times the number of active lanes.
Value *const Ctpop = B.CreateIntCast(
B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false);
- NewV = B.CreateMul(V, Ctpop);
+ NewV = buildMul(B, V, Ctpop);
break;
}
@@ -543,7 +548,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// old value times the parity of the number of active lanes.
Value *const Ctpop = B.CreateIntCast(
B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false);
- NewV = B.CreateMul(V, B.CreateAnd(Ctpop, 1));
+ NewV = buildMul(B, V, B.CreateAnd(Ctpop, 1));
break;
}
}
@@ -622,7 +627,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
llvm_unreachable("Unhandled atomic op");
case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:
- LaneOffset = B.CreateMul(V, Mbcnt);
+ LaneOffset = buildMul(B, V, Mbcnt);
break;
case AtomicRMWInst::And:
case AtomicRMWInst::Or:
@@ -633,7 +638,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
LaneOffset = B.CreateSelect(Cond, Identity, V);
break;
case AtomicRMWInst::Xor:
- LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1));
+ LaneOffset = buildMul(B, V, B.CreateAnd(Mbcnt, 1));
break;
}
}