aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAmir Ayupov <aaupov@fb.com>2024-07-02 09:18:59 -0700
committerGitHub <noreply@github.com>2024-07-02 09:18:59 -0700
commit01ff0ba336b367d18d6e128890f5db53b9a8f50f (patch)
treeaa3f44aac0a3b3be98488ef2187da9087027629d
parentce2208331d0530db525928a2d427c48f4c5fb8bb (diff)
downloadllvm-users/aaupov/spr/main.bolt-drop-macro-fusion-alignment.zip
llvm-users/aaupov/spr/main.bolt-drop-macro-fusion-alignment.tar.gz
llvm-users/aaupov/spr/main.bolt-drop-macro-fusion-alignment.tar.bz2
[BOLT] Drop macro-fusion alignment (#97358)users/aaupov/spr/main.bolt-drop-macro-fusion-alignment
9d0754ada5dbbc0c009bcc2f7824488419cc5530 dropped MC support required for optimal macro-fusion alignment in BOLT. Remove the support in BOLT as performance measurements with large binaries didn't show a significant improvement. Test Plan: macro-fusion alignment was never upstreamed, so no upstream tests are affected.
-rw-r--r--bolt/include/bolt/Core/BinaryBasicBlock.h9
-rw-r--r--bolt/include/bolt/Core/BinaryContext.h4
-rw-r--r--bolt/include/bolt/Core/BinaryFunction.h4
-rw-r--r--bolt/include/bolt/Core/MCPlusBuilder.h7
-rw-r--r--bolt/lib/Core/BinaryBasicBlock.cpp39
-rw-r--r--bolt/lib/Core/BinaryEmitter.cpp37
-rw-r--r--bolt/lib/Core/BinaryFunction.cpp25
-rw-r--r--bolt/lib/Passes/BinaryPasses.cpp20
-rw-r--r--bolt/lib/Rewrite/RewriteInstance.cpp22
-rw-r--r--bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp4
-rw-r--r--bolt/lib/Target/X86/X86MCPlusBuilder.cpp34
11 files changed, 0 insertions, 205 deletions
diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h
index a57b707..9a9d7b8 100644
--- a/bolt/include/bolt/Core/BinaryBasicBlock.h
+++ b/bolt/include/bolt/Core/BinaryBasicBlock.h
@@ -842,15 +842,6 @@ public:
bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
MCInst *&CondBranch, MCInst *&UncondBranch);
- /// Return true if iterator \p I is pointing to the first instruction in
- /// a pair that could be macro-fused.
- bool isMacroOpFusionPair(const_iterator I) const;
-
- /// If the basic block has a pair of instructions suitable for macro-fusion,
- /// return iterator to the first instruction of the pair.
- /// Otherwise return end().
- const_iterator getMacroOpFusionPair() const;
-
/// Printer required for printing dominator trees.
void printAsOperand(raw_ostream &OS, bool PrintType = true) {
if (PrintType)
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 4ec3de3..73932c4 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -698,10 +698,6 @@ public:
/// Binary-wide aggregated stats.
struct BinaryStats {
- /// Stats for macro-fusion.
- uint64_t MissedMacroFusionPairs{0};
- uint64_t MissedMacroFusionExecCount{0};
-
/// Stats for stale profile matching:
/// the total number of basic blocks in the profile
uint32_t NumStaleBlocks{0};
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 3c64158..d318dfb 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -835,10 +835,6 @@ public:
/// them.
void calculateLoopInfo();
- /// Calculate missed macro-fusion opportunities and update BinaryContext
- /// stats.
- void calculateMacroOpFusionStats();
-
/// Returns if BinaryDominatorTree has been constructed for this function.
bool hasDomTree() const { return BDT != nullptr; }
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index a5fb390..584848a 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -917,13 +917,6 @@ public:
/// Return true if the instruction is encoded using EVEX (AVX-512).
virtual bool hasEVEXEncoding(const MCInst &Inst) const { return false; }
- /// Return true if a pair of instructions represented by \p Insts
- /// could be fused into a single uop.
- virtual bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const {
- llvm_unreachable("not implemented");
- return false;
- }
-
struct X86MemOperand {
unsigned BaseRegNum;
int64_t ScaleImm;
diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp
index a4b9a7f..2a2192b 100644
--- a/bolt/lib/Core/BinaryBasicBlock.cpp
+++ b/bolt/lib/Core/BinaryBasicBlock.cpp
@@ -404,45 +404,6 @@ bool BinaryBasicBlock::analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
CondBranch, UncondBranch);
}
-bool BinaryBasicBlock::isMacroOpFusionPair(const_iterator I) const {
- auto &MIB = Function->getBinaryContext().MIB;
- ArrayRef<MCInst> Insts = Instructions;
- return MIB->isMacroOpFusionPair(Insts.slice(I - begin()));
-}
-
-BinaryBasicBlock::const_iterator
-BinaryBasicBlock::getMacroOpFusionPair() const {
- if (!Function->getBinaryContext().isX86())
- return end();
-
- if (getNumNonPseudos() < 2 || succ_size() != 2)
- return end();
-
- auto RI = getLastNonPseudo();
- assert(RI != rend() && "cannot have an empty block with 2 successors");
-
- BinaryContext &BC = Function->getBinaryContext();
-
- // Skip instruction if it's an unconditional branch following
- // a conditional one.
- if (BC.MIB->isUnconditionalBranch(*RI))
- ++RI;
-
- if (!BC.MIB->isConditionalBranch(*RI))
- return end();
-
- // Start checking with instruction preceding the conditional branch.
- ++RI;
- if (RI == rend())
- return end();
-
- auto II = std::prev(RI.base()); // convert to a forward iterator
- if (isMacroOpFusionPair(II))
- return II;
-
- return end();
-}
-
MCInst *BinaryBasicBlock::getTerminatorBefore(MCInst *Pos) {
BinaryContext &BC = Function->getBinaryContext();
auto Itr = rbegin();
diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
index 5793963..f6dfa24 100644
--- a/bolt/lib/Core/BinaryEmitter.cpp
+++ b/bolt/lib/Core/BinaryEmitter.cpp
@@ -38,19 +38,6 @@ extern cl::opt<bool> PreserveBlocksAlignment;
cl::opt<bool> AlignBlocks("align-blocks", cl::desc("align basic blocks"),
cl::cat(BoltOptCategory));
-cl::opt<MacroFusionType>
-AlignMacroOpFusion("align-macro-fusion",
- cl::desc("fix instruction alignment for macro-fusion (x86 relocation mode)"),
- cl::init(MFT_HOT),
- cl::values(clEnumValN(MFT_NONE, "none",
- "do not insert alignment no-ops for macro-fusion"),
- clEnumValN(MFT_HOT, "hot",
- "only insert alignment no-ops on hot execution paths (default)"),
- clEnumValN(MFT_ALL, "all",
- "always align instructions to allow macro-fusion")),
- cl::ZeroOrMore,
- cl::cat(BoltRelocCategory));
-
static cl::list<std::string>
BreakFunctionNames("break-funcs",
cl::CommaSeparated,
@@ -453,20 +440,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
Streamer.emitLabel(EntrySymbol);
}
- // Check if special alignment for macro-fusion is needed.
- bool MayNeedMacroFusionAlignment =
- (opts::AlignMacroOpFusion == MFT_ALL) ||
- (opts::AlignMacroOpFusion == MFT_HOT && BB->getKnownExecutionCount());
- BinaryBasicBlock::const_iterator MacroFusionPair;
- if (MayNeedMacroFusionAlignment) {
- MacroFusionPair = BB->getMacroOpFusionPair();
- if (MacroFusionPair == BB->end())
- MayNeedMacroFusionAlignment = false;
- }
-
SMLoc LastLocSeen;
- // Remember if the last instruction emitted was a prefix.
- bool LastIsPrefix = false;
for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
MCInst &Instr = *I;
@@ -479,16 +453,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
continue;
}
- // Handle macro-fusion alignment. If we emitted a prefix as
- // the last instruction, we should've already emitted the associated
- // alignment hint, so don't emit it twice.
- if (MayNeedMacroFusionAlignment && !LastIsPrefix &&
- I == MacroFusionPair) {
- // This assumes the second instruction in the macro-op pair will get
- // assigned to its own MCRelaxableFragment. Since all JCC instructions
- // are relaxable, we should be safe.
- }
-
if (!EmitCodeOnly) {
// A symbol to be emitted before the instruction to mark its location.
MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr);
@@ -525,7 +489,6 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
}
Streamer.emitInstruction(Instr, *BC.STI);
- LastIsPrefix = BC.MIB->isPrefix(Instr);
}
}
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index d13e289..a316b8e 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -2276,8 +2276,6 @@ void BinaryFunction::postProcessCFG() {
postProcessBranches();
}
- calculateMacroOpFusionStats();
-
// The final cleanup of intermediate structures.
clearList(IgnoredBranches);
@@ -2294,29 +2292,6 @@ void BinaryFunction::postProcessCFG() {
"invalid CFG detected after post-processing");
}
-void BinaryFunction::calculateMacroOpFusionStats() {
- if (!getBinaryContext().isX86())
- return;
- for (const BinaryBasicBlock &BB : blocks()) {
- auto II = BB.getMacroOpFusionPair();
- if (II == BB.end())
- continue;
-
- // Check offset of the second instruction.
- // FIXME: arch-specific.
- const uint32_t Offset = BC.MIB->getOffsetWithDefault(*std::next(II), 0);
- if (!Offset || (getAddress() + Offset) % 64)
- continue;
-
- LLVM_DEBUG(dbgs() << "\nmissed macro-op fusion at address 0x"
- << Twine::utohexstr(getAddress() + Offset)
- << " in function " << *this << "; executed "
- << BB.getKnownExecutionCount() << " times.\n");
- ++BC.Stats.MissedMacroFusionPairs;
- BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount();
- }
-}
-
void BinaryFunction::removeTagsFromProfile() {
for (BinaryBasicBlock *BB : BasicBlocks) {
if (BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE)
diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp
index 2810f72..64aa312 100644
--- a/bolt/lib/Passes/BinaryPasses.cpp
+++ b/bolt/lib/Passes/BinaryPasses.cpp
@@ -44,7 +44,6 @@ namespace opts {
extern cl::OptionCategory BoltCategory;
extern cl::OptionCategory BoltOptCategory;
-extern cl::opt<bolt::MacroFusionType> AlignMacroOpFusion;
extern cl::opt<unsigned> Verbosity;
extern cl::opt<bool> EnableBAT;
extern cl::opt<unsigned> ExecutionCountThreshold;
@@ -1635,25 +1634,6 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
}
}
- // Print information on missed macro-fusion opportunities seen on input.
- if (BC.Stats.MissedMacroFusionPairs) {
- BC.outs() << format(
- "BOLT-INFO: the input contains %zu (dynamic count : %zu)"
- " opportunities for macro-fusion optimization",
- BC.Stats.MissedMacroFusionPairs, BC.Stats.MissedMacroFusionExecCount);
- switch (opts::AlignMacroOpFusion) {
- case MFT_NONE:
- BC.outs() << ". Use -align-macro-fusion to fix.\n";
- break;
- case MFT_HOT:
- BC.outs() << ". Will fix instances on a hot path.\n";
- break;
- case MFT_ALL:
- BC.outs() << " that are going to be fixed\n";
- break;
- }
- }
-
// Collect and print information about suboptimal code layout on input.
if (opts::ReportBadLayout) {
std::vector<BinaryFunction *> SuboptimalFuncs;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 1a3a8af..e2925f4 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -76,7 +76,6 @@ extern cl::opt<bool> X86AlignBranchWithin32BBoundaries;
namespace opts {
-extern cl::opt<MacroFusionType> AlignMacroOpFusion;
extern cl::list<std::string> HotTextMoveSections;
extern cl::opt<bool> Hugify;
extern cl::opt<bool> Instrument;
@@ -1972,12 +1971,6 @@ void RewriteInstance::adjustCommandLineOptions() {
if (RuntimeLibrary *RtLibrary = BC->getRuntimeLibrary())
RtLibrary->adjustCommandLineOptions(*BC);
- if (opts::AlignMacroOpFusion != MFT_NONE && !BC->isX86()) {
- BC->outs()
- << "BOLT-INFO: disabling -align-macro-fusion on non-x86 platform\n";
- opts::AlignMacroOpFusion = MFT_NONE;
- }
-
if (BC->isX86() && BC->MAB->allowAutoPadding()) {
if (!BC->HasRelocations) {
BC->errs()
@@ -1988,13 +1981,6 @@ void RewriteInstance::adjustCommandLineOptions() {
BC->outs()
<< "BOLT-WARNING: using mitigation for Intel JCC erratum, layout "
"may take several minutes\n";
- opts::AlignMacroOpFusion = MFT_NONE;
- }
-
- if (opts::AlignMacroOpFusion != MFT_NONE && !BC->HasRelocations) {
- BC->outs() << "BOLT-INFO: disabling -align-macro-fusion in non-relocation "
- "mode\n";
- opts::AlignMacroOpFusion = MFT_NONE;
}
if (opts::SplitEH && !BC->HasRelocations) {
@@ -2016,14 +2002,6 @@ void RewriteInstance::adjustCommandLineOptions() {
opts::StrictMode = true;
}
- if (BC->isX86() && BC->HasRelocations &&
- opts::AlignMacroOpFusion == MFT_HOT && !ProfileReader) {
- BC->outs()
- << "BOLT-INFO: enabling -align-macro-fusion=all since no profile "
- "was specified\n";
- opts::AlignMacroOpFusion = MFT_ALL;
- }
-
if (!BC->HasRelocations &&
opts::ReorderFunctions != ReorderFunctions::RT_NONE) {
BC->errs() << "BOLT-ERROR: function reordering only works when "
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index a74eda8..a21cf8f 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -141,10 +141,6 @@ public:
*AArch64ExprB.getSubExpr(), Comp);
}
- bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
- return false;
- }
-
bool shortenInstruction(MCInst &, const MCSubtargetInfo &) const override {
return false;
}
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index e350e70..263e642 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -661,40 +661,6 @@ public:
return (Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX;
}
- bool isMacroOpFusionPair(ArrayRef<MCInst> Insts) const override {
- const auto *I = Insts.begin();
- while (I != Insts.end() && isPrefix(*I))
- ++I;
- if (I == Insts.end())
- return false;
-
- const MCInst &FirstInst = *I;
- ++I;
- while (I != Insts.end() && isPrefix(*I))
- ++I;
- if (I == Insts.end())
- return false;
- const MCInst &SecondInst = *I;
-
- if (!isConditionalBranch(SecondInst))
- return false;
- // Cannot fuse if the first instruction uses RIP-relative memory.
- if (hasPCRelOperand(FirstInst))
- return false;
-
- const X86::FirstMacroFusionInstKind CmpKind =
- X86::classifyFirstOpcodeInMacroFusion(FirstInst.getOpcode());
- if (CmpKind == X86::FirstMacroFusionInstKind::Invalid)
- return false;
-
- X86::CondCode CC = static_cast<X86::CondCode>(getCondCode(SecondInst));
- X86::SecondMacroFusionInstKind BranchKind =
- X86::classifySecondCondCodeInMacroFusion(CC);
- if (BranchKind == X86::SecondMacroFusionInstKind::Invalid)
- return false;
- return X86::isMacroFused(CmpKind, BranchKind);
- }
-
std::optional<X86MemOperand>
evaluateX86MemoryOperand(const MCInst &Inst) const override {
int MemOpNo = getMemoryOperandNo(Inst);