aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRahman Lavaee <rahmanl@google.com>2025-07-09 10:48:07 -0700
committerGitHub <noreply@github.com>2025-07-09 10:48:07 -0700
commitcd9236d78833a3f312d0a38e53e3f12e9926bcf3 (patch)
tree934d91b9164e39f823a8f111c76a512e9fa490f7
parenta63846b475bacfda49eb00016e0dc43c9ab1aa7d (diff)
downloadllvm-cd9236d78833a3f312d0a38e53e3f12e9926bcf3.zip
llvm-cd9236d78833a3f312d0a38e53e3f12e9926bcf3.tar.gz
llvm-cd9236d78833a3f312d0a38e53e3f12e9926bcf3.tar.bz2
Account for inline assembly instructions in inlining cost. (#146628)
Inliner currently treats every "call asm" IR instruction as a single instruction regardless of how many instructions the inline assembly may contain. This may underestimate the cost of inlining for a callee containing long inline assembly. Besides, we may need to assign a higher cost to instructions in inline assembly since they cannot be analyzed and optimized by the compiler. This PR introduces a new option `-inline-asm-instr-cost` -- set zero by default, which can control the cost of inline assembly instructions in inliner's cost-benefit analysis.
-rw-r--r--llvm/lib/Analysis/InlineCost.cpp55
-rw-r--r--llvm/test/Transforms/Inline/inline-call-with-asm-call.ll35
2 files changed, 90 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 773a604..22f4d08 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
@@ -141,6 +142,10 @@ static cl::opt<int>
InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
cl::desc("Cost of a single instruction when inlining"));
+static cl::opt<int> InlineAsmInstrCost(
+ "inline-asm-instr-cost", cl::Hidden, cl::init(0),
+ cl::desc("Cost of a single inline asm instruction when inlining"));
+
static cl::opt<int>
MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0),
cl::desc("Cost of load/store instruction when inlining"));
@@ -351,6 +356,9 @@ protected:
/// for.
virtual void onMissedSimplification() {}
+ /// Account for inline assembly instructions.
+ virtual void onInlineAsm(const InlineAsm &Arg) {}
+
/// Start accounting potential benefits due to SROA for the given alloca.
virtual void onInitializeSROAArg(AllocaInst *Arg) {}
@@ -382,6 +390,7 @@ protected:
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize = 0;
unsigned NumInstructions = 0;
+ unsigned NumInlineAsmInstructions = 0;
unsigned NumVectorInstructions = 0;
/// While we walk the potentially-inlined instructions, we build up and
@@ -777,6 +786,48 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
addCost(SwitchCost);
}
+
+ // Parses the inline assembly argument to account for its cost. Inline
+ // assembly instructions incur higher costs for inlining since they cannot be
+ // analyzed and optimized.
+ void onInlineAsm(const InlineAsm &Arg) override {
+ if (!InlineAsmInstrCost)
+ return;
+ SmallVector<StringRef, 4> AsmStrs;
+ Arg.collectAsmStrs(AsmStrs);
+ int SectionLevel = 0;
+ int InlineAsmInstrCount = 0;
+ for (StringRef AsmStr : AsmStrs) {
+ // Trim whitespaces and comments.
+ StringRef Trimmed = AsmStr.trim();
+ size_t hashPos = Trimmed.find('#');
+ if (hashPos != StringRef::npos)
+ Trimmed = Trimmed.substr(0, hashPos);
+ // Ignore comments.
+ if (Trimmed.empty())
+ continue;
+ // Filter out the outlined assembly instructions from the cost by keeping
+ // track of the section level and only accounting for instrutions at
+ // section level of zero. Note there will be duplication in outlined
+ // sections too, but is not accounted in the inlining cost model.
+ if (Trimmed.starts_with(".pushsection")) {
+ ++SectionLevel;
+ continue;
+ }
+ if (Trimmed.starts_with(".popsection")) {
+ --SectionLevel;
+ continue;
+ }
+ // Ignore directives and labels.
+ if (Trimmed.starts_with(".") || Trimmed.contains(":"))
+ continue;
+ if (SectionLevel == 0)
+ ++InlineAsmInstrCount;
+ }
+ NumInlineAsmInstructions += InlineAsmInstrCount;
+ addCost(InlineAsmInstrCount * InlineAsmInstrCost);
+ }
+
void onMissedSimplification() override { addCost(InstrCost); }
void onInitializeSROAArg(AllocaInst *Arg) override {
@@ -2420,6 +2471,9 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
ContainsNoDuplicateCall = true;
+ if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand()))
+ onInlineAsm(*InlineAsmOp);
+
Function *F = Call.getCalledFunction();
bool IsIndirectCall = !F;
if (IsIndirectCall) {
@@ -3005,6 +3059,7 @@ void InlineCostCallAnalyzer::print(raw_ostream &OS) {
DEBUG_PRINT_STAT(NumConstantPtrDiffs);
DEBUG_PRINT_STAT(NumInstructionsSimplified);
DEBUG_PRINT_STAT(NumInstructions);
+ DEBUG_PRINT_STAT(NumInlineAsmInstructions);
DEBUG_PRINT_STAT(SROACostSavings);
DEBUG_PRINT_STAT(SROACostSavingsLost);
DEBUG_PRINT_STAT(LoadEliminationCost);
diff --git a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
new file mode 100644
index 0000000..7d8121d
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll
@@ -0,0 +1,35 @@
+;; Test to verify that when callee has inline assembly, bumping up `-inline-asm-instr-cost` would block inlining.
+
+; RUN: opt < %s -passes=inline -S | FileCheck %s --check-prefixes=CHECK,INLINE
+; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s --check-prefixes=CHECK,INLINE
+
+;; Verify that a low assembly instruction cost of 150 does not block inlining.
+;; This test also verifies that the outlined section's instructions (in "other"
+;; section) do not contribute to the cost.
+; RUN: opt < %s -passes=inline -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=CHECK,INLINE
+; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=CHECK,INLINE
+
+;; Verify that an assembly instruction cost of 300 blocks inlining.
+; RUN: opt < %s -passes=inline -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=CHECK,NOINLINE
+; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=CHECK,NOINLINE
+
+define void @caller(i32 %a, i1 %b) #0 {
+ call void @callee(i32 %a, i1 %b)
+ ret void
+}
+
+; CHECK: define void @caller
+; INLINE: call void asm
+; NOINLINE: call void @callee
+
+
+;; callee function with asm call with two real assembly instructions in the
+;; destination section and two assembly instructions in the outlined "other"
+;; section.
+define void @callee(i32 %a, i1 %b) {
+ call void asm sideeffect "s_nop 1\0A\09.pushsection other\0A\09s_nop 2\0A\09s_nop 3\0A\09.popsection\0A\09s_nop 4\0A\09.align 32", ""()
+ ret void
+}
+; CHECK: define void @callee
+
+