diff options
| author | Neil Henning <neil.henning@amd.com> | 2019-01-18 16:39:27 +0000 |
|---|---|---|
| committer | Neil Henning <neil.henning@amd.com> | 2019-01-18 16:39:27 +0000 |
| commit | 3ed09f8e0cb4fdeefcbdd4ed8739fd021377774f (patch) | |
| tree | b9fea4e7b84ff11d56f892e5eebd613cb39ea8ac | |
| parent | 7503316d106edbd7dcfd0604a22cc656c17da524 (diff) | |
| download | llvm-3ed09f8e0cb4fdeefcbdd4ed8739fd021377774f.zip llvm-3ed09f8e0cb4fdeefcbdd4ed8739fd021377774f.tar.gz llvm-3ed09f8e0cb4fdeefcbdd4ed8739fd021377774f.tar.bz2 | |
[AMDGPU] Add some missing always-uniform values.
This commit adds some missing intrinsics into the isAlwaysUniform list
for the AMDGPU backend.
Differential Revision: https://reviews.llvm.org/D56845
llvm-svn: 351562
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 2 | ||||
| -rw-r--r-- | llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll | 20 |
2 files changed, 21 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 62e7e44..674230c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -579,6 +579,8 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const { return false; case Intrinsic::amdgcn_readfirstlane: case Intrinsic::amdgcn_readlane: + case Intrinsic::amdgcn_icmp: + case Intrinsic::amdgcn_fcmp: return true; } } diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll index eaac9ce..b5e8e49 100644 --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/always_uniform.ll @@ -1,6 +1,7 @@ ; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s -define amdgpu_kernel void @workitem_id_x() #1 { +; CHECK: for function 'readfirstlane': +define amdgpu_kernel void @readfirstlane() { %id.x = call i32 @llvm.amdgcn.workitem.id.x() ; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x() %first.lane = call i32 @llvm.amdgcn.readfirstlane(i32 %id.x) @@ -8,7 +9,24 @@ define amdgpu_kernel void @workitem_id_x() #1 { ret void } +; CHECK: for function 'icmp': +define amdgpu_kernel void @icmp(i32 inreg %x) { +; CHECK-NOT: DIVERGENT: %icmp = call i64 @llvm.amdgcn.icmp.i32 + %icmp = call i64 @llvm.amdgcn.icmp.i32(i32 %x, i32 0, i32 33) + ret void +} + +; CHECK: for function 'fcmp': +define amdgpu_kernel void @fcmp(float inreg %x, float inreg %y) { +; CHECK-NOT: DIVERGENT: %fcmp = call i64 @llvm.amdgcn.fcmp.i32 + %fcmp = call i64 @llvm.amdgcn.fcmp.i32(float %x, float %y, i32 33) + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.readfirstlane(i32) #0 +declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #1 +declare i64 @llvm.amdgcn.fcmp.i32(float, float, i32) #1 attributes #0 = { nounwind readnone } +attributes #1 = { nounwind readnone convergent } |
