diff options
author | Rahman Lavaee <rahmanl@google.com> | 2025-07-09 10:48:07 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-09 10:48:07 -0700 |
commit | cd9236d78833a3f312d0a38e53e3f12e9926bcf3 (patch) | |
tree | 934d91b9164e39f823a8f111c76a512e9fa490f7 | |
parent | a63846b475bacfda49eb00016e0dc43c9ab1aa7d (diff) | |
download | llvm-cd9236d78833a3f312d0a38e53e3f12e9926bcf3.zip llvm-cd9236d78833a3f312d0a38e53e3f12e9926bcf3.tar.gz llvm-cd9236d78833a3f312d0a38e53e3f12e9926bcf3.tar.bz2 |
Account for inline assembly instructions in inlining cost. (#146628)
Inliner currently treats every "call asm" IR instruction as a single
instruction regardless of how many instructions the inline assembly may
contain. This may underestimate the cost of inlining for a callee
containing long inline assembly. Besides, we may need to assign a higher
cost to instructions in inline assembly since they cannot be analyzed
and optimized by the compiler.
This PR introduces a new option `-inline-asm-instr-cost` -- set zero by
default, which can control the cost of inline assembly instructions in
inliner's cost-benefit analysis.
-rw-r--r-- | llvm/lib/Analysis/InlineCost.cpp | 55 | ||||
-rw-r--r-- | llvm/test/Transforms/Inline/inline-call-with-asm-call.ll | 35 |
2 files changed, 90 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 773a604..22f4d08 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" @@ -141,6 +142,10 @@ static cl::opt<int> InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining")); +static cl::opt<int> InlineAsmInstrCost( + "inline-asm-instr-cost", cl::Hidden, cl::init(0), + cl::desc("Cost of a single inline asm instruction when inlining")); + static cl::opt<int> MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0), cl::desc("Cost of load/store instruction when inlining")); @@ -351,6 +356,9 @@ protected: /// for. virtual void onMissedSimplification() {} + /// Account for inline assembly instructions. + virtual void onInlineAsm(const InlineAsm &Arg) {} + /// Start accounting potential benefits due to SROA for the given alloca. virtual void onInitializeSROAArg(AllocaInst *Arg) {} @@ -382,6 +390,7 @@ protected: /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize = 0; unsigned NumInstructions = 0; + unsigned NumInlineAsmInstructions = 0; unsigned NumVectorInstructions = 0; /// While we walk the potentially-inlined instructions, we build up and @@ -777,6 +786,48 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { addCost(SwitchCost); } + + // Parses the inline assembly argument to account for its cost. Inline + // assembly instructions incur higher costs for inlining since they cannot be + // analyzed and optimized. + void onInlineAsm(const InlineAsm &Arg) override { + if (!InlineAsmInstrCost) + return; + SmallVector<StringRef, 4> AsmStrs; + Arg.collectAsmStrs(AsmStrs); + int SectionLevel = 0; + int InlineAsmInstrCount = 0; + for (StringRef AsmStr : AsmStrs) { + // Trim whitespaces and comments. + StringRef Trimmed = AsmStr.trim(); + size_t hashPos = Trimmed.find('#'); + if (hashPos != StringRef::npos) + Trimmed = Trimmed.substr(0, hashPos); + // Ignore comments. + if (Trimmed.empty()) + continue; + // Filter out the outlined assembly instructions from the cost by keeping + // track of the section level and only accounting for instrutions at + // section level of zero. Note there will be duplication in outlined + // sections too, but is not accounted in the inlining cost model. + if (Trimmed.starts_with(".pushsection")) { + ++SectionLevel; + continue; + } + if (Trimmed.starts_with(".popsection")) { + --SectionLevel; + continue; + } + // Ignore directives and labels. + if (Trimmed.starts_with(".") || Trimmed.contains(":")) + continue; + if (SectionLevel == 0) + ++InlineAsmInstrCount; + } + NumInlineAsmInstructions += InlineAsmInstrCount; + addCost(InlineAsmInstrCount * InlineAsmInstrCost); + } + void onMissedSimplification() override { addCost(InstrCost); } void onInitializeSROAArg(AllocaInst *Arg) override { @@ -2420,6 +2471,9 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate()) ContainsNoDuplicateCall = true; + if (InlineAsm *InlineAsmOp = dyn_cast<InlineAsm>(Call.getCalledOperand())) + onInlineAsm(*InlineAsmOp); + Function *F = Call.getCalledFunction(); bool IsIndirectCall = !F; if (IsIndirectCall) { @@ -3005,6 +3059,7 @@ void InlineCostCallAnalyzer::print(raw_ostream &OS) { DEBUG_PRINT_STAT(NumConstantPtrDiffs); DEBUG_PRINT_STAT(NumInstructionsSimplified); DEBUG_PRINT_STAT(NumInstructions); + DEBUG_PRINT_STAT(NumInlineAsmInstructions); DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); DEBUG_PRINT_STAT(LoadEliminationCost); diff --git a/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll new file mode 100644 index 0000000..7d8121d --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-call-with-asm-call.ll @@ -0,0 +1,35 @@ +;; Test to verify that when callee has inline assembly, bumping up `-inline-asm-instr-cost` would block inlining. + +; RUN: opt < %s -passes=inline -S | FileCheck %s --check-prefixes=CHECK,INLINE +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s --check-prefixes=CHECK,INLINE + +;; Verify that a low assembly instruction cost of 150 does not block inlining. +;; This test also verifies that the outlined section's instructions (in "other" +;; section) do not contribute to the cost. +; RUN: opt < %s -passes=inline -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=CHECK,INLINE +; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=150 -S | FileCheck %s --check-prefixes=CHECK,INLINE + +;; Verify that an assembly instruction cost of 300 blocks inlining. +; RUN: opt < %s -passes=inline -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=CHECK,NOINLINE +; RUN: opt < %s -passes='cgscc(inline)' -inline-asm-instr-cost=300 -S | FileCheck %s --check-prefixes=CHECK,NOINLINE + +define void @caller(i32 %a, i1 %b) #0 { + call void @callee(i32 %a, i1 %b) + ret void +} + +; CHECK: define void @caller +; INLINE: call void asm +; NOINLINE: call void @callee + + +;; callee function with asm call with two real assembly instructions in the +;; destination section and two assembly instructions in the outlined "other" +;; section. +define void @callee(i32 %a, i1 %b) { + call void asm sideeffect "s_nop 1\0A\09.pushsection other\0A\09s_nop 2\0A\09s_nop 3\0A\09.popsection\0A\09s_nop 4\0A\09.align 32", ""() + ret void +} +; CHECK: define void @callee + + |