From 94faadaca4e1704f674d2e9d4a1d25643b9ca52c Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Wed, 5 Aug 2020 15:34:31 -0700 Subject: [llvm][CodeGen] Machine Function Splitter We introduce a codegen optimization pass which splits functions into hot and cold parts. This pass leverages the basic block sections feature recently introduced in LLVM from the Propeller project. The pass targets functions with profile coverage, identifies cold blocks and moves them to a separate section. The linker groups all cold blocks across functions together, decreasing fragmentation and improving icache and itlb utilization. We evaluated the Machine Function Splitter pass on clang bootstrap and SPECInt 2017. For clang bootstrap we observe a mean 2.33% runtime improvement with a ~32% reduction in itlb and stlb misses. Additionally, L1 icache misses reduced by 9.5% while L2 instruction misses reduced by 20%. For SPECInt we report the change in IntRate the C/C++ benchmarks. All benchmarks apart from mcf and x264 improve, on average by 0.6% with the max for deepsjeng at 1.6%. Benchmark % Change 500.perlbench_r 0.78 502.gcc_r 0.82 505.mcf_r -0.30 520.omnetpp_r 0.18 523.xalancbmk_r 0.37 525.x264_r -0.46 531.deepsjeng_r 1.61 541.leela_r 0.83 557.xz_r 0.15 Differential Revision: https://reviews.llvm.org/D85368 --- llvm/lib/CodeGen/CommandFlags.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'llvm/lib/CodeGen/CommandFlags.cpp') diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index a47608c..0a7c7a9 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -84,6 +84,7 @@ CGOPT(DebuggerKind, DebuggerTuningOpt) CGOPT(bool, EnableStackSizeSection) CGOPT(bool, EnableAddrsig) CGOPT(bool, EmitCallSiteInfo) +CGOPT(bool, EnableMachineFunctionSplitter) CGOPT(bool, EnableDebugEntryValues) CGOPT(bool, ValueTrackingVariableLocations) CGOPT(bool, ForceDwarfFrameSection) @@ -407,6 +408,13 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(ValueTrackingVariableLocations); + static cl::opt EnableMachineFunctionSplitter( + "split-machine-functions", + cl::desc("Split out cold basic blocks from machine functions based on " + "profile information"), + cl::init(false)); + CGBINDOPT(EnableMachineFunctionSplitter); + static cl::opt ForceDwarfFrameSection( "force-dwarf-frame-section", cl::desc("Always emit a debug frame section."), cl::init(false)); @@ -479,6 +487,7 @@ TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() { Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0; Options.ExceptionModel = getExceptionModel(); Options.EmitStackSizeSection = getEnableStackSizeSection(); + Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter(); Options.EmitAddrsig = getEnableAddrsig(); Options.EmitCallSiteInfo = getEmitCallSiteInfo(); Options.EnableDebugEntryValues = getEnableDebugEntryValues(); -- cgit v1.1