diff options
author | Jon Roelofs <jonathan_roelofs@apple.com> | 2023-09-15 10:04:56 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-15 10:04:56 -0700 |
commit | 003bcad9a8b21e15e3786a52b1dafa844075ab84 (patch) | |
tree | 67d541bf7208375fcf859309fa1f4c327200a5d0 | |
parent | 06e1bcaa1c0b330767540a415c6df799d772f6f1 (diff) | |
download | llvm-003bcad9a8b21e15e3786a52b1dafa844075ab84.zip llvm-003bcad9a8b21e15e3786a52b1dafa844075ab84.tar.gz llvm-003bcad9a8b21e15e3786a52b1dafa844075ab84.tar.bz2 |
[ARM] Always lower direct calls as direct when the outliner is enabled (#66434)
The indirect lowering hinders the outliner's ability to see that
sequences are in fact common, since the sequence similarity is rendered
opaque by the register callee. The size savings from making them
indirect seems to be dwarfed by the outliner's savings from
de-duplication.
rdar://115178034
rdar://115459865
-rw-r--r-- | llvm/lib/CodeGen/TargetPassConfig.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/minsize-call-cse.ll | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/pr42062.ll | 2 |
4 files changed, 21 insertions, 5 deletions
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index e6ecbc9..94a3264 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1241,6 +1241,9 @@ void TargetPassConfig::addMachinePasses() { addPass(&LiveDebugValuesID); addPass(&MachineSanitizerBinaryMetadataID); + if (EnableMachineOutliner == RunOutliner::NeverOutline) + TM->Options.EnableMachineOutliner = false; + if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOptLevel::None && EnableMachineOutliner != RunOutliner::NeverOutline) { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index d9ae95d..21489a6 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2395,6 +2395,14 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB; }) > 2; + // The indirect call lowering hinders the MachineOutliner's ability to + // recognize common sequences. The resulting indirect calls all have the + // same target, but the outliner can't tell this a priori, since the + // branch target is turned into a register operand, and those can't (yet?) + // be assumed to have the same value at runtime. + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.EnableMachineOutliner) + PreferIndirect = false; } } if (isTailCall) { diff --git a/llvm/test/CodeGen/ARM/minsize-call-cse.ll b/llvm/test/CodeGen/ARM/minsize-call-cse.ll index 072b76f..3a72dd2 100644 --- a/llvm/test/CodeGen/ARM/minsize-call-cse.ll +++ b/llvm/test/CodeGen/ARM/minsize-call-cse.ll @@ -1,12 +1,17 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc < %s | FileCheck %s --check-prefixes=OUTLINER,CHECK +; RUN: llc -enable-machine-outliner=always < %s | FileCheck %s --check-prefixes=OUTLINER,CHECK +; RUN: llc -enable-machine-outliner=never < %s | FileCheck %s --check-prefixes=INDIRECT,CHECK target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv7m-arm-none-eabi" ; CHECK-LABEL: f: -; CHECK: blx r -; CHECK: blx r -; CHECK: blx r +; OUTLINER: bl g +; OUTLINER: bl g +; OUTLINER: bl g +; INDIRECT: blx r +; INDIRECT: blx r +; INDIRECT: blx r define void @f() minsize optsize { entry: call void @g(i32 45, i32 66) diff --git a/llvm/test/CodeGen/ARM/pr42062.ll b/llvm/test/CodeGen/ARM/pr42062.ll index 8396f93..ba7e436a 100644 --- a/llvm/test/CodeGen/ARM/pr42062.ll +++ b/llvm/test/CodeGen/ARM/pr42062.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -o - %s 2>&1 | FileCheck %s --implicit-check-not=error +; RUN: llc -o - %s -enable-machine-outliner=never 2>&1 | FileCheck %s --implicit-check-not=error target triple = "thumbv8m.base-arm-none-eabi" @foo = external global i8 declare i32 @bar(ptr nocapture, i32, i32, ptr nocapture) |