diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2021-03-01 16:13:13 -0800 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2021-03-01 19:48:50 -0800 |
commit | 7c724a896f93c97fe75db6f37b0995c9b35e0b82 (patch) | |
tree | cdc5b82af8020cd92bec20ad4312708cb84cb88f | |
parent | ea1a1ebbc673d810f1abf6cb58a40b5ec916ff07 (diff) | |
download | llvm-7c724a896f93c97fe75db6f37b0995c9b35e0b82.zip llvm-7c724a896f93c97fe75db6f37b0995c9b35e0b82.tar.gz llvm-7c724a896f93c97fe75db6f37b0995c9b35e0b82.tar.bz2 |
[AMDGPU] Do not check max-bb for a single block callee
-amdgpu-inline-max-bb option could lead to a suboptimal
codegen preventing inlining of really simple functions
including pure wrapper calls. Relax the cutoff by allowing
to call a function with a single block on the grounds
that it will not increase total number of blocks after
inlining.
Differential Revision: https://reviews.llvm.org/D97744
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll | 44 |
2 files changed, 36 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index eb2733f..6728b07 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1149,7 +1149,9 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller, // Hack to make compile times reasonable. if (InlineMaxBB && !Callee->hasFnAttribute(Attribute::InlineHint)) { - // Single BB does not increase total BB amount, thus subtract 1. + // Single BB does not increase total BB amount. + if (Callee->size() == 1) + return true; size_t BBSize = Caller->size() + Callee->size() - 1; return BBSize <= InlineMaxBB; } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll index 9b0803b..7d02f39 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll @@ -1,7 +1,8 @@ -; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -inline-threshold=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INL1 %s -; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INLDEF %s -; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default<O3>' -S -inline-threshold=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INL1 %s -; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default<O3>' -S < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INLDEF %s +; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -inline-threshold=1 < %s | FileCheck -check-prefixes=GCN,GCN-INL1,GCN-MAXBBDEF %s +; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S < %s | FileCheck -check-prefixes=GCN,GCN-INLDEF,GCN-MAXBBDEF %s +; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default<O3>' -S -inline-threshold=1 < %s | FileCheck -check-prefixes=GCN,GCN-INL1,GCN-MAXBBDEF %s +; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default<O3>' -S < %s | FileCheck -check-prefixes=GCN,GCN-INLDEF,GCN-MAXBBDEF %s +; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default<O3>' -S -amdgpu-inline-max-bb=1 < %s | FileCheck -check-prefixes=GCN,GCN-MAXBB1 %s define coldcc float @foo(float %x, float %y) { entry: @@ -57,12 +58,14 @@ entry: } ; GCN: define amdgpu_kernel void @test_inliner( -; GCN-INL1: %c1 = tail call coldcc float @foo( -; GCN-INLDEF: %cmp.i = fcmp ogt float %tmp2, 0.000000e+00 -; GCN: %div.i{{[0-9]*}} = fdiv float 1.000000e+00, %c -; GCN: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i -; GCN: call void @foo_noinline( -; GCN: tail call float @_Z3sinf( +; GCN-INL1: %c1 = tail call coldcc float @foo( +; GCN-INLDEF: %cmp.i = fcmp ogt float %tmp2, 0.000000e+00 +; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 1.000000e+00, %c +; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i +; GCN-MAXBB1: call coldcc void @foo_private_ptr +; GCN-MAXBB1: call coldcc void @foo_private_ptr2 +; GCN: call void @foo_noinline( +; GCN: tail call float @_Z3sinf( define amdgpu_kernel void @test_inliner(float addrspace(1)* nocapture %a, i32 %n) { entry: %pvt_arr = alloca [64 x float], align 4, addrspace(5) @@ -95,7 +98,8 @@ entry: } ; GCN: define amdgpu_kernel void @test_inliner_multi_pvt_ptr( -; GCN: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i +; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i +; GCN-MAXBB1: call coldcc void @foo_private_ptr2 define amdgpu_kernel void @test_inliner_multi_pvt_ptr(float addrspace(1)* nocapture %a, i32 %n, float %v) { entry: %pvt_arr1 = alloca [32 x float], align 4, addrspace(5) @@ -147,6 +151,24 @@ entry: ret void } +; GCN: define amdgpu_kernel void @test_inliner_maxbb_singlebb( +; GCN: tail call float @_Z3sinf +define amdgpu_kernel void @test_inliner_maxbb_singlebb(float addrspace(1)* nocapture %a, i32 %n) { +entry: + %cmp = icmp eq i32 %n, 1 + br i1 %cmp, label %bb.1, label %bb.2 + br label %bb.1 + +bb.1: + store float 1.0, float* undef + br label %bb.2 + +bb.2: + %c = call float @sin_wrapper(float 1.0) + store float %c, float addrspace(1)* %a + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @_Z3sinf(float) #1 |