aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXuan Zhang <144393379+xuanzh-meta@users.noreply.github.com>2024-06-18 10:13:05 -0400
committerGitHub <noreply@github.com>2024-06-18 07:13:05 -0700
commitd9a00ed3668803d11675b103fe9b6ed077ddc4c1 (patch)
treeb07e202a7ecf35058895ffcfb8d6aad984c2106c
parent66959ff8633684ec285d62ac244d38bfc8b263da (diff)
downloadllvm-d9a00ed3668803d11675b103fe9b6ed077ddc4c1.zip
llvm-d9a00ed3668803d11675b103fe9b6ed077ddc4c1.tar.gz
llvm-d9a00ed3668803d11675b103fe9b6ed077ddc4c1.tar.bz2
[MachineOutliner] Leaf Descendants (#90275)
This PR depends on https://github.com/llvm/llvm-project/pull/90264 In the current implementation, only leaf children of each internal node in the suffix tree are included as candidates for outlining. But all leaf descendants are outlining candidates, which we include in the new implementation. This is enabled on a flag `outliner-leaf-descendants` which is default to be true. The reason for _enabling this on a flag_ is because machine outliner is not the only pass that uses suffix tree. The reason for _having this default to be true_ is because including all leaf descendants show consistent size win. * For Clang/LLD, it shows around 3% reduction in text segment size when compared to the baseline `-Oz` linker binary. * For selected benchmark tests in LLVM test suite | run (CTMark/) | only leaf children | all leaf descendants | reduction % | |------------------|--------------------|----------------------|-------------| | lencod | 349624 | 348564 | -0.2004% | | SPASS | 219672 | 218440 | -0.4738% | | kc | 271956 | 250068 | -0.4506% | | sqlite3 | 223920 | 222484 | -0.5471% | | 7zip-benchmark | 405364 | 401244 | -0.3428% | | bullet | 139820 | 138340 | -0.8315% | | consumer-typeset | 295684 | 286628 | -1.2295% | | pairlocalalign | 72236 | 71936 | -0.2164% | | tramp3d-v4 | 189572 | 183676 | -2.9668% | This is part of an enhanced version of machine outliner -- see [RFC](https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-1-fulllto-part-2-thinlto-nolto-to-come/78732).
-rw-r--r--llvm/include/llvm/Support/SuffixTree.h34
-rw-r--r--llvm/include/llvm/Support/SuffixTreeNode.h25
-rw-r--r--llvm/lib/CodeGen/MachineOutliner.cpp8
-rw-r--r--llvm/lib/Support/SuffixTree.cpp105
-rw-r--r--llvm/lib/Support/SuffixTreeNode.cpp5
-rw-r--r--llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir2
-rw-r--r--llvm/test/CodeGen/AArch64/machine-outliner-leaf-descendants.ll124
-rw-r--r--llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir39
-rw-r--r--llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir2
-rw-r--r--llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/machine-outliner.mir2
-rw-r--r--llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir8
-rw-r--r--llvm/unittests/Support/SuffixTreeTest.cpp134
15 files changed, 462 insertions, 36 deletions
diff --git a/llvm/include/llvm/Support/SuffixTree.h b/llvm/include/llvm/Support/SuffixTree.h
index 4940fbb..37b7366 100644
--- a/llvm/include/llvm/Support/SuffixTree.h
+++ b/llvm/include/llvm/Support/SuffixTree.h
@@ -42,6 +42,9 @@ public:
/// Each element is an integer representing an instruction in the module.
ArrayRef<unsigned> Str;
+ /// Whether to consider leaf descendants or only leaf children.
+ bool OutlinerLeafDescendants;
+
/// A repeated substring in the tree.
struct RepeatedSubstring {
/// The length of the string.
@@ -130,11 +133,27 @@ private:
/// this step.
unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd);
+ /// This vector contains all leaf nodes of this suffix tree. These leaf nodes
+ /// are identified using post-order depth-first traversal, so that the order
+ /// of these leaf nodes in the vector matches the order of the leaves in the
+ /// tree from left to right if one were to draw the tree on paper.
+ std::vector<SuffixTreeLeafNode *> LeafNodes;
+
+ /// Perform a post-order depth-first traversal of the tree and perform two
+ /// tasks during the traversal. The first is to populate LeafNodes, adding
+ /// nodes in order of the traversal. The second is to keep track of the leaf
+ /// descendants of every internal node by assigning values to LeftLeafIndex
+ /// and RightLefIndex fields of SuffixTreeNode for all internal nodes.
+ void setLeafNodes();
+
public:
/// Construct a suffix tree from a sequence of unsigned integers.
///
/// \param Str The string to construct the suffix tree for.
- SuffixTree(const ArrayRef<unsigned> &Str);
+ /// \param OutlinerLeafDescendants Whether to consider leaf descendants or
+ /// only leaf children (used by Machine Outliner).
+ SuffixTree(const ArrayRef<unsigned> &Str,
+ bool OutlinerLeafDescendants = false);
/// Iterator for finding all repeated substrings in the suffix tree.
struct RepeatedSubstringIterator {
@@ -154,6 +173,12 @@ public:
/// instruction lengths.
const unsigned MinLength = 2;
+ /// Vector of leaf nodes of the suffix tree.
+ const std::vector<SuffixTreeLeafNode *> &LeafNodes;
+
+ /// Whether to consider leaf descendants or only leaf children.
+ bool OutlinerLeafDescendants = !LeafNodes.empty();
+
/// Move the iterator to the next repeated substring.
void advance();
@@ -179,7 +204,10 @@ public:
return !(*this == Other);
}
- RepeatedSubstringIterator(SuffixTreeInternalNode *N) : N(N) {
+ RepeatedSubstringIterator(
+ SuffixTreeInternalNode *N,
+ const std::vector<SuffixTreeLeafNode *> &LeafNodes = {})
+ : N(N), LeafNodes(LeafNodes) {
// Do we have a non-null node?
if (!N)
return;
@@ -191,7 +219,7 @@ public:
};
typedef RepeatedSubstringIterator iterator;
- iterator begin() { return iterator(Root); }
+ iterator begin() { return iterator(Root, LeafNodes); }
iterator end() { return iterator(nullptr); }
};
diff --git a/llvm/include/llvm/Support/SuffixTreeNode.h b/llvm/include/llvm/Support/SuffixTreeNode.h
index 7d0d1cf..84b590f 100644
--- a/llvm/include/llvm/Support/SuffixTreeNode.h
+++ b/llvm/include/llvm/Support/SuffixTreeNode.h
@@ -46,6 +46,17 @@ private:
/// the root to this node.
unsigned ConcatLen = 0;
+ /// These two indices give a range of indices for its leaf descendants.
+ /// Imagine drawing a tree on paper and assigning a unique index to each leaf
+ /// node in monotonically increasing order from left to right. This way of
+ /// numbering the leaf nodes allows us to associate a continuous range of
+ /// indices with each internal node. For example, if a node has leaf
+ /// descendants with indices i, i+1, ..., j, then its LeftLeafIdx is i and
+ /// its RightLeafIdx is j. These indices are for LeafNodes in the SuffixTree
+ /// class, which is constructed using post-order depth-first traversal.
+ unsigned LeftLeafIdx = EmptyIdx;
+ unsigned RightLeafIdx = EmptyIdx;
+
public:
// LLVM RTTI boilerplate.
NodeKind getKind() const { return Kind; }
@@ -56,6 +67,18 @@ public:
/// \returns the end index of this node.
virtual unsigned getEndIdx() const = 0;
+ /// \return the index of this node's left most leaf node.
+ unsigned getLeftLeafIdx() const;
+
+ /// \return the index of this node's right most leaf node.
+ unsigned getRightLeafIdx() const;
+
+ /// Set the index of the left most leaf node of this node to \p Idx.
+ void setLeftLeafIdx(unsigned Idx);
+
+ /// Set the index of the right most leaf node of this node to \p Idx.
+ void setRightLeafIdx(unsigned Idx);
+
/// Advance this node's StartIdx by \p Inc.
void incrementStartIdx(unsigned Inc);
@@ -168,4 +191,4 @@ public:
virtual ~SuffixTreeLeafNode() = default;
};
} // namespace llvm
-#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H \ No newline at end of file
+#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 9e2e631..6836f6c 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -121,6 +121,12 @@ static cl::opt<unsigned> OutlinerBenefitThreshold(
cl::desc(
"The minimum size in bytes before an outlining candidate is accepted"));
+static cl::opt<bool> OutlinerLeafDescendants(
+ "outliner-leaf-descendants", cl::init(true), cl::Hidden,
+ cl::desc("Consider all leaf descendants of internal nodes of the suffix "
+ "tree as candidates for outlining (if false, only leaf children "
+ "are considered)"));
+
namespace {
/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -576,7 +582,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
void MachineOutliner::findCandidates(
InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
FunctionList.clear();
- SuffixTree ST(Mapper.UnsignedVec);
+ SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);
// First, find all of the repeated substrings in the tree of minimum length
// 2.
diff --git a/llvm/lib/Support/SuffixTree.cpp b/llvm/lib/Support/SuffixTree.cpp
index c00c798..5e58310 100644
--- a/llvm/lib/Support/SuffixTree.cpp
+++ b/llvm/lib/Support/SuffixTree.cpp
@@ -26,7 +26,9 @@ static size_t numElementsInSubstring(const SuffixTreeNode *N) {
return N->getEndIdx() - N->getStartIdx() + 1;
}
-SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
+SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str,
+ bool OutlinerLeafDescendants)
+ : Str(Str), OutlinerLeafDescendants(OutlinerLeafDescendants) {
Root = insertRoot();
Active.Node = Root;
@@ -46,6 +48,11 @@ SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
// Set the suffix indices of each leaf.
assert(Root && "Root node can't be nullptr!");
setSuffixIndices();
+
+ // Collect all leaf nodes of the suffix tree. And for each internal node,
+ // record the range of leaf nodes that are descendants of it.
+ if (OutlinerLeafDescendants)
+ setLeafNodes();
}
SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent,
@@ -105,6 +112,70 @@ void SuffixTree::setSuffixIndices() {
}
}
+void SuffixTree::setLeafNodes() {
+ // A stack that keeps track of nodes to visit for post-order DFS traversal.
+ SmallVector<SuffixTreeNode *> ToVisit;
+ ToVisit.push_back(Root);
+
+ // This keeps track of the index of the next leaf node to be added to
+ // the LeafNodes vector of the suffix tree.
+ unsigned LeafCounter = 0;
+
+ // This keeps track of nodes whose children have been added to the stack.
+ // The value is a pair, representing a node's first and last children.
+ DenseMap<SuffixTreeInternalNode *,
+ std::pair<SuffixTreeNode *, SuffixTreeNode *>>
+ ChildrenMap;
+
+ // Traverse the tree in post-order.
+ while (!ToVisit.empty()) {
+ SuffixTreeNode *CurrNode = ToVisit.pop_back_val();
+ if (auto *CurrInternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) {
+ // The current node is an internal node.
+ auto I = ChildrenMap.find(CurrInternalNode);
+ if (I == ChildrenMap.end()) {
+ // This is the first time we visit this node.
+ // Its children have not been added to the stack yet.
+ // We add current node back, and add its children to the stack.
+ // We keep track of the first and last children of the current node.
+ auto J = CurrInternalNode->Children.begin();
+ if (J != CurrInternalNode->Children.end()) {
+ ToVisit.push_back(CurrNode);
+ SuffixTreeNode *FirstChild = J->second;
+ SuffixTreeNode *LastChild = nullptr;
+ for (; J != CurrInternalNode->Children.end(); ++J) {
+ LastChild = J->second;
+ ToVisit.push_back(LastChild);
+ }
+ ChildrenMap[CurrInternalNode] = {FirstChild, LastChild};
+ }
+ } else {
+ // This is the second time we visit this node.
+ // All of its children have already been processed.
+ // Now, we can set its LeftLeafIdx and RightLeafIdx;
+ auto [FirstChild, LastChild] = I->second;
+ // Get the first child to use its RightLeafIdx.
+ // The first child is the first one added to the stack, so it is
+ // the last one to be processed. Hence, the leaf descendants
+ // of the first child are assigned the largest index numbers.
+ CurrNode->setRightLeafIdx(FirstChild->getRightLeafIdx());
+ // Get the last child to use its LeftLeafIdx.
+ CurrNode->setLeftLeafIdx(LastChild->getLeftLeafIdx());
+ assert(CurrNode->getLeftLeafIdx() <= CurrNode->getRightLeafIdx() &&
+ "LeftLeafIdx should not be larger than RightLeafIdx");
+ }
+ } else {
+ // The current node is a leaf node.
+ // We can simply set its LeftLeafIdx and RightLeafIdx.
+ CurrNode->setLeftLeafIdx(LeafCounter);
+ CurrNode->setRightLeafIdx(LeafCounter);
+ ++LeafCounter;
+ auto *CurrLeafNode = cast<SuffixTreeLeafNode>(CurrNode);
+ LeafNodes.push_back(CurrLeafNode);
+ }
+ }
+}
+
unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
SuffixTreeInternalNode *NeedsLink = nullptr;
@@ -241,30 +312,34 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
// it's too short, we'll quit.
unsigned Length = Curr->getConcatLen();
- // Iterate over each child, saving internal nodes for visiting, and
- // leaf nodes' SuffixIdx in RepeatedSubstringStarts. Internal nodes
- // represent individual strings, which may repeat.
- for (auto &ChildPair : Curr->Children) {
+ // Iterate over each child, saving internal nodes for visiting.
+ // Internal nodes represent individual strings, which may repeat.
+ for (auto &ChildPair : Curr->Children)
// Save all of this node's children for processing.
if (auto *InternalChild =
- dyn_cast<SuffixTreeInternalNode>(ChildPair.second)) {
+ dyn_cast<SuffixTreeInternalNode>(ChildPair.second))
InternalNodesToVisit.push_back(InternalChild);
- continue;
- }
-
- if (Length < MinLength)
- continue;
- // Have an occurrence of a potentially repeated string. Save it.
- auto *Leaf = cast<SuffixTreeLeafNode>(ChildPair.second);
- RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
- }
+ // If length of repeated substring is below threshold, then skip it.
+ if (Length < MinLength)
+ continue;
// The root never represents a repeated substring. If we're looking at
// that, then skip it.
if (Curr->isRoot())
continue;
+ // Collect leaf children or leaf descendants by OutlinerLeafDescendants.
+ if (OutlinerLeafDescendants) {
+ for (unsigned I = Curr->getLeftLeafIdx(); I <= Curr->getRightLeafIdx();
+ ++I)
+ RepeatedSubstringStarts.push_back(LeafNodes[I]->getSuffixIdx());
+ } else {
+ for (auto &ChildPair : Curr->Children)
+ if (auto *Leaf = dyn_cast<SuffixTreeLeafNode>(ChildPair.second))
+ RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
+ }
+
// Do we have any repeated substrings?
if (RepeatedSubstringStarts.size() < 2)
continue;
diff --git a/llvm/lib/Support/SuffixTreeNode.cpp b/llvm/lib/Support/SuffixTreeNode.cpp
index 113b990..9f1f94a 100644
--- a/llvm/lib/Support/SuffixTreeNode.cpp
+++ b/llvm/lib/Support/SuffixTreeNode.cpp
@@ -38,3 +38,8 @@ unsigned SuffixTreeLeafNode::getEndIdx() const {
unsigned SuffixTreeLeafNode::getSuffixIdx() const { return SuffixIdx; }
void SuffixTreeLeafNode::setSuffixIdx(unsigned Idx) { SuffixIdx = Idx; }
+
+unsigned SuffixTreeNode::getLeftLeafIdx() const { return LeftLeafIdx; }
+unsigned SuffixTreeNode::getRightLeafIdx() const { return RightLeafIdx; }
+void SuffixTreeNode::setLeftLeafIdx(unsigned Idx) { LeftLeafIdx = Idx; }
+void SuffixTreeNode::setRightLeafIdx(unsigned Idx) { RightLeafIdx = Idx; }
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir b/llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir
index 67d4119..3afa1d5 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs -outliner-leaf-descendants=false %s -o - | FileCheck %s
# Outlining CFI instructions is unsafe if we cannot outline all of the CFI
# instructions from a function. This shows that we choose not to outline the
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-leaf-descendants.ll b/llvm/test/CodeGen/AArch64/machine-outliner-leaf-descendants.ll
new file mode 100644
index 0000000..d78eea7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-leaf-descendants.ll
@@ -0,0 +1,124 @@
+; This test is mainly for the -outliner-leaf-descendants flag for MachineOutliner.
+;
+; ===================== -outliner-leaf-descendants=false =====================
+; MachineOutliner finds THREE key `OutlinedFunction` and outlines them. They are:
+; ```
+; mov w0, #1
+; mov w1, #2
+; mov w2, #3
+; mov w3, #4
+; mov w4, #5
+; mov w5, #6 or #7 or #8
+; b
+; ```
+; Each has:
+; - `SequenceSize=28` and `OccurrenceCount=2`
+; - each Candidate has `CallOverhead=4` and `FrameOverhead=0`
+; - `NotOutlinedCost=28*2=56` and `OutliningCost=4*2+28+0=36`
+; - `Benefit=56-36=20` and `Priority=56/36=1.56`
+;
+; ===================== -outliner-leaf-descendants=true =====================
+; MachineOutliner finds a FOURTH key `OutlinedFunction`, which is:
+; ```
+; mov w0, #1
+; mov w1, #2
+; mov w2, #3
+; mov w3, #4
+; mov w4, #5
+; ```
+; This corresponds to an internal node that has ZERO leaf children, but SIX leaf descendants.
+; It has:
+; - `SequenceSize=20` and `OccurrenceCount=6`
+; - each Candidate has `CallOverhead=12` and `FrameOverhead=4`
+; - `NotOutlinedCost=20*6=120` and `OutliningCost=12*6+20+4=96`
+; - `Benefit=120-96=24` and `Priority=120/96=1.25`
+;
+; The FOURTH `OutlinedFunction` has lower _priority_ compared to the first THREE `OutlinedFunction`.
+; Hence, we use `-outliner-benefit-threshold=22` to check if the FOURTH `OutlinedFunction` is identified.
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-leaf-descendants=false -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-BASELINE
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-leaf-descendants=false -outliner-benefit-threshold=22 -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-NO-CANDIDATE
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-leaf-descendants=true -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-BASELINE
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-leaf-descendants=true -outliner-benefit-threshold=22 -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-LEAF-DESCENDANTS
+
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx14.0.0"
+
+declare i32 @_Z3fooiiii(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef)
+
+define i32 @_Z2f1v() minsize {
+ %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6)
+ ret i32 %1
+}
+
+define i32 @_Z2f2v() minsize {
+ %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6)
+ ret i32 %1
+}
+
+define i32 @_Z2f3v() minsize {
+ %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 7)
+ ret i32 %1
+}
+
+define i32 @_Z2f4v() minsize {
+ %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 7)
+ ret i32 %1
+}
+
+define i32 @_Z2f5v() minsize {
+ %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 8)
+ ret i32 %1
+}
+
+define i32 @_Z2f6v() minsize {
+ %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 8)
+ ret i32 %1
+}
+
+; CHECK-BASELINE: <_OUTLINED_FUNCTION_0>:
+; CHECK-BASELINE-NEXT: mov w0, #0x1
+; CHECK-BASELINE-NEXT: mov w1, #0x2
+; CHECK-BASELINE-NEXT: mov w2, #0x3
+; CHECK-BASELINE-NEXT: mov w3, #0x4
+; CHECK-BASELINE-NEXT: mov w4, #0x5
+; CHECK-BASELINE-NEXT: mov w5, #0x6
+; CHECK-BASELINE-NEXT: b
+
+; CHECK-BASELINE: <_OUTLINED_FUNCTION_1>:
+; CHECK-BASELINE-NEXT: mov w0, #0x1
+; CHECK-BASELINE-NEXT: mov w1, #0x2
+; CHECK-BASELINE-NEXT: mov w2, #0x3
+; CHECK-BASELINE-NEXT: mov w3, #0x4
+; CHECK-BASELINE-NEXT: mov w4, #0x5
+; CHECK-BASELINE-NEXT: mov w5, #0x8
+; CHECK-BASELINE-NEXT: b
+
+; CHECK-BASELINE: <_OUTLINED_FUNCTION_2>:
+; CHECK-BASELINE-NEXT: mov w0, #0x1
+; CHECK-BASELINE-NEXT: mov w1, #0x2
+; CHECK-BASELINE-NEXT: mov w2, #0x3
+; CHECK-BASELINE-NEXT: mov w3, #0x4
+; CHECK-BASELINE-NEXT: mov w4, #0x5
+; CHECK-BASELINE-NEXT: mov w5, #0x7
+; CHECK-BASELINE-NEXT: b
+
+; CHECK-LEAF-DESCENDANTS: <_OUTLINED_FUNCTION_0>:
+; CHECK-LEAF-DESCENDANTS-NEXT: mov w0, #0x1
+; CHECK-LEAF-DESCENDANTS-NEXT: mov w1, #0x2
+; CHECK-LEAF-DESCENDANTS-NEXT: mov w2, #0x3
+; CHECK-LEAF-DESCENDANTS-NEXT: mov w3, #0x4
+; CHECK-LEAF-DESCENDANTS-NEXT: mov w4, #0x5
+; CHECK-LEAF-DESCENDANTS-NEXT: ret
+
+; CHECK-LEAF-DESCENDANTS-NOT: <_OUTLINED_FUNCTION_1>:
+
+; CHECK-NO-CANDIDATE-NOT: <_OUTLINED_FUNCTION_0>:
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir b/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir
index c6bd4c1..48a97b6 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --include-generated-funcs
-# RUN: llc %s -mtriple aarch64 -debug-only=machine-outliner -run-pass=machine-outliner -o - 2>&1 | FileCheck %s
+# RUN: llc %s -mtriple aarch64 -outliner-leaf-descendants=false -debug-only=machine-outliner -run-pass=machine-outliner -o - 2>&1 | FileCheck %s
+# RUN: llc %s -mtriple aarch64 -debug-only=machine-outliner -run-pass=machine-outliner -o - 2>&1 | FileCheck %s --check-prefix=CHECK-LEAF
# REQUIRES: asserts
# CHECK: *** Discarding overlapping candidates ***
@@ -54,7 +55,39 @@ body: |
; CHECK-NEXT: BL @OUTLINED_FUNCTION_0, implicit-def $lr, implicit $sp, implicit-def $lr, implicit-def $x8, implicit-def $x9, implicit $sp, implicit $x0, implicit $x9
; CHECK-NEXT: RET undef $x9
- ; fixme: outline!
+ ; CHECK-LABEL: name: OUTLINED_FUNCTION_0
+ ; CHECK: liveins: $x0, $x9, $lr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x8 = ADDXri $x0, 3, 0
+ ; CHECK-NEXT: $x9 = ADDXri $x9, 16, 0
+ ; CHECK-NEXT: $x9 = ADDXri $x9, 16, 0
+ ; CHECK-NEXT: $x9 = ADDXri $x9, 16, 0
+ ; CHECK-NEXT: $x9 = ADDXri $x9, 16, 0
+ ; CHECK-NEXT: $x9 = ADDXri $x9, 16, 0
+ ; CHECK-NEXT: $x9 = ADDXri $x9, 16, 0
+ ; CHECK-NEXT: RET $lr
+
+ ; CHECK-LEAF-LABEL: name: overlap
+ ; CHECK-LEAF: liveins: $x0, $x9
+ ; CHECK-LEAF-NEXT: {{ $}}
+ ; CHECK-LEAF-NEXT: BL @OUTLINED_FUNCTION_0
+ ; CHECK-LEAF-NEXT: BL @OUTLINED_FUNCTION_0
+ ; CHECK-LEAF-NEXT: $x8 = ADDXri $x0, 3, 0
+ ; CHECK-LEAF-NEXT: BL @OUTLINED_FUNCTION_0
+ ; CHECK-LEAF-NEXT: BL @OUTLINED_FUNCTION_0
+ ; CHECK-LEAF-NEXT: $x8 = ADDXri $x0, 3, 0
+ ; CHECK-LEAF-NEXT: BL @OUTLINED_FUNCTION_0
+ ; CHECK-LEAF-NEXT: BL @OUTLINED_FUNCTION_0
+ ; CHECK-LEAF-NEXT: RET undef $x9
+
+ ; CHECK-LEAF-LABEL: name: OUTLINED_FUNCTION_0
+ ; CHECK-LEAF: liveins: $x0, $x9, $lr
+ ; CHECK-LEAF-NEXT: {{ $}}
+ ; CHECK-LEAF-NEXT: $x9 = ADDXri $x9, 16, 0
+ ; CHECK-LEAF-NEXT: $x9 = ADDXri $x9, 16, 0
+ ; CHECK-LEAF-NEXT: $x9 = ADDXri $x9, 16, 0
+ ; CHECK-LEAF-NEXT: RET $lr
+
$x9 = ADDXri $x9, 16, 0
$x9 = ADDXri $x9, 16, 0
$x9 = ADDXri $x9, 16, 0
@@ -64,7 +97,6 @@ body: |
$x8 = ADDXri $x0, 3, 0
- ; outline
$x9 = ADDXri $x9, 16, 0
$x9 = ADDXri $x9, 16, 0
$x9 = ADDXri $x9, 16, 0
@@ -74,7 +106,6 @@ body: |
$x8 = ADDXri $x0, 3, 0
- ; outline
$x9 = ADDXri $x9, 16, 0
$x9 = ADDXri $x9, 16, 0
$x9 = ADDXri $x9, 16, 0
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir
index c1c2720..22e5ede 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir
@@ -1,4 +1,4 @@
-# RUN: llc -verify-machineinstrs -run-pass=machine-outliner -run-pass=aarch64-ptrauth %s -o - | FileCheck %s
+# RUN: llc -verify-machineinstrs -run-pass=machine-outliner -run-pass=aarch64-ptrauth -outliner-leaf-descendants=false %s -o - | FileCheck %s
--- |
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
index 9250718..618973b 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple aarch64-arm-linux-gnu --enable-machine-outliner \
+; RUN: llc -mtriple aarch64-arm-linux-gnu --enable-machine-outliner -outliner-leaf-descendants=false \
; RUN: -verify-machineinstrs %s -o - | FileCheck --check-prefixes CHECK,V8A %s
-; RUN-V83A: llc -mtriple aarch64 -enable-machine-outliner \
+; RUN-V83A: llc -mtriple aarch64 -enable-machine-outliner -outliner-leaf-descendants=false \
; RUN-V83A: -verify-machineinstrs -mattr=+v8.3a %s -o - > %t
; RUN-V83A: FileCheck --check-prefixes CHECK,V83A < %t %s
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll b/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
index aa6e31d..538e116 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64 -frame-pointer=non-leaf < %s | FileCheck %s --check-prefix=NOOMIT
-; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64 -frame-pointer=none < %s | FileCheck %s --check-prefix=OMITFP
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -outliner-leaf-descendants=false -mtriple=aarch64 -frame-pointer=non-leaf < %s | FileCheck %s --check-prefix=NOOMIT
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -outliner-leaf-descendants=false -mtriple=aarch64 -frame-pointer=none < %s | FileCheck %s --check-prefix=OMITFP
define void @_Z1giii(i32 %x, i32 %y, i32 %z) minsize {
; NOOMIT-LABEL: _Z1giii:
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
index 8740aac..7e34adf 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -enable-machine-outliner -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -enable-machine-outliner -outliner-leaf-descendants=false -verify-machineinstrs | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-pc-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner.mir b/llvm/test/CodeGen/AArch64/machine-outliner.mir
index 83eda74..66779ad 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=aarch64--- -run-pass=prologepilog -run-pass=machine-outliner -verify-machineinstrs -frame-pointer=non-leaf %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64--- -run-pass=prologepilog -run-pass=machine-outliner -verify-machineinstrs -frame-pointer=non-leaf -outliner-leaf-descendants=false %s -o - | FileCheck %s
--- |
@x = common global i32 0, align 4
diff --git a/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir b/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir
index 34f7a93..8a83543 100644
--- a/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir
+++ b/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir
@@ -1,11 +1,11 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
+# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs -outliner-leaf-descendants=false < %s \
# RUN: | FileCheck %s
-# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
+# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs -outliner-leaf-descendants=false < %s \
# RUN: | FileCheck %s
-# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir --function-sections -verify-machineinstrs < %s \
+# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir --function-sections -verify-machineinstrs -outliner-leaf-descendants=false < %s \
# RUN: | FileCheck -check-prefix=CHECK-FS %s
-# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir --function-sections -verify-machineinstrs < %s \
+# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir --function-sections -verify-machineinstrs -outliner-leaf-descendants=false < %s \
# RUN: | FileCheck -check-prefix=CHECK-FS %s
--- |
diff --git a/llvm/unittests/Support/SuffixTreeTest.cpp b/llvm/unittests/Support/SuffixTreeTest.cpp
index f5d8112..e8393c0 100644
--- a/llvm/unittests/Support/SuffixTreeTest.cpp
+++ b/llvm/unittests/Support/SuffixTreeTest.cpp
@@ -78,6 +78,7 @@ TEST(SuffixTreeTest, TestLongerRepetition) {
// indices 0 and 1.
//
// FIXME: Add support for detecting {1, 1} and {1, 1, 1}
+// See Test TestSingleCharacterRepeatWithLeafDescendants for the fix
TEST(SuffixTreeTest, TestSingleCharacterRepeat) {
std::vector<unsigned> RepeatedRepetitionData = {1, 1, 1, 1, 1, 1, 2};
std::vector<unsigned>::iterator RRDIt, RRDIt2;
@@ -100,6 +101,35 @@ TEST(SuffixTreeTest, TestSingleCharacterRepeat) {
}
}
+// Tests that the SuffixTree is able to find the following substrings:
+// {1, 1} at indices 0, 1, 2, 3, and 4;
+// {1, 1, 1} at indices 0, 1, 2, and 3;
+// {1, 1, 1, 1} at indices 0, 1, and 2; and
+// {1, 1, 1, 1, 1} at indices 0 and 1.
+//
+// This is the fix for TestSingleCharacterRepeat.
+TEST(SuffixTreeTest, TestSingleCharacterRepeatWithLeafDescendants) {
+ std::vector<unsigned> RepeatedRepetitionData = {1, 1, 1, 1, 1, 1, 2};
+ std::vector<unsigned>::iterator RRDIt, RRDIt2;
+ SuffixTree ST(RepeatedRepetitionData, /*OutlinerLeafDescendants=*/true);
+ std::vector<SuffixTree::RepeatedSubstring> SubStrings;
+ for (auto It = ST.begin(); It != ST.end(); It++)
+ SubStrings.push_back(*It);
+ EXPECT_EQ(SubStrings.size(), 4u);
+ for (SuffixTree::RepeatedSubstring &RS : SubStrings) {
+ EXPECT_EQ(RS.StartIndices.size(),
+ RepeatedRepetitionData.size() - RS.Length);
+ for (unsigned StartIdx : SubStrings[0].StartIndices) {
+ RRDIt = RRDIt2 = RepeatedRepetitionData.begin();
+ std::advance(RRDIt, StartIdx);
+ std::advance(RRDIt2, StartIdx + SubStrings[0].Length);
+ ASSERT_TRUE(
+ all_of(make_range<std::vector<unsigned>::iterator>(RRDIt, RRDIt2),
+ [](unsigned Elt) { return Elt == 1; }));
+ }
+ }
+}
+
// The suffix tree cannot currently find repeated substrings in strings of the
// form {1, 2, 3, 1, 2, 3}, because the two {1, 2, 3}s are adjacent ("tandem
// repeats")
@@ -140,4 +170,108 @@ TEST(SuffixTreeTest, TestExclusion) {
}
}
+// Tests that the SuffixTree is able to find three substrings
+// {1, 2, 3} at indices 6 and 10;
+// {2, 3} at indices 7 and 11; and
+// {1, 2} at indicies 0 and 3.
+//
+// FIXME: {1, 2} has indices 6 and 10 missing as it is a substring of {1, 2, 3}
+// See Test TestSubstringRepeatsWithLeafDescendants for the fix
+TEST(SuffixTreeTest, TestSubstringRepeats) {
+ std::vector<unsigned> RepeatedRepetitionData = {1, 2, 100, 1, 2, 101, 1,
+ 2, 3, 103, 1, 2, 3, 104};
+ SuffixTree ST(RepeatedRepetitionData);
+ std::vector<SuffixTree::RepeatedSubstring> SubStrings;
+ for (auto It = ST.begin(); It != ST.end(); It++)
+ SubStrings.push_back(*It);
+ EXPECT_EQ(SubStrings.size(), 3u);
+ unsigned Len;
+ for (SuffixTree::RepeatedSubstring &RS : SubStrings) {
+ Len = RS.Length;
+ bool IsExpectedLen = (Len == 3u || Len == 2u);
+ ASSERT_TRUE(IsExpectedLen);
+ bool IsExpectedIndex;
+
+ if (Len == 3u) { // {1, 2, 3}
+ EXPECT_EQ(RS.StartIndices.size(), 2u);
+ for (unsigned StartIdx : RS.StartIndices) {
+ IsExpectedIndex = (StartIdx == 6u || StartIdx == 10u);
+ EXPECT_TRUE(IsExpectedIndex);
+ EXPECT_EQ(RepeatedRepetitionData[StartIdx], 1u);
+ EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 2u);
+ EXPECT_EQ(RepeatedRepetitionData[StartIdx + 2], 3u);
+ }
+ } else {
+ if (RepeatedRepetitionData[RS.StartIndices[0]] == 1u) { // {1, 2}
+ EXPECT_EQ(RS.StartIndices.size(), 2u);
+ for (unsigned StartIdx : RS.StartIndices) {
+ IsExpectedIndex = (StartIdx == 0u || StartIdx == 3u);
+ EXPECT_TRUE(IsExpectedIndex);
+ EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 2u);
+ }
+ } else { // {2, 3}
+ EXPECT_EQ(RS.StartIndices.size(), 2u);
+ for (unsigned StartIdx : RS.StartIndices) {
+ IsExpectedIndex = (StartIdx == 7u || StartIdx == 11u);
+ EXPECT_TRUE(IsExpectedIndex);
+ EXPECT_EQ(RepeatedRepetitionData[StartIdx], 2u);
+ EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 3u);
+ }
+ }
+ }
+ }
+}
+
+// Tests that the SuffixTree is able to find three substrings
+// {1, 2, 3} at indices 6 and 10;
+// {2, 3} at indices 7 and 11; and
+// {1, 2} at indicies 0, 3, 6, and 10.
+//
+// This is the fix for TestSubstringRepeats
+TEST(SuffixTreeTest, TestSubstringRepeatsWithLeafDescendants) {
+ std::vector<unsigned> RepeatedRepetitionData = {1, 2, 100, 1, 2, 101, 1,
+ 2, 3, 103, 1, 2, 3, 104};
+ SuffixTree ST(RepeatedRepetitionData, /*OutlinerLeafDescendants=*/true);
+ std::vector<SuffixTree::RepeatedSubstring> SubStrings;
+ for (auto It = ST.begin(); It != ST.end(); It++)
+ SubStrings.push_back(*It);
+ EXPECT_EQ(SubStrings.size(), 3u);
+ unsigned Len;
+ for (SuffixTree::RepeatedSubstring &RS : SubStrings) {
+ Len = RS.Length;
+ bool IsExpectedLen = (Len == 3u || Len == 2u);
+ ASSERT_TRUE(IsExpectedLen);
+ bool IsExpectedIndex;
+
+ if (Len == 3u) { // {1, 2, 3}
+ EXPECT_EQ(RS.StartIndices.size(), 2u);
+ for (unsigned StartIdx : RS.StartIndices) {
+ IsExpectedIndex = (StartIdx == 6u || StartIdx == 10u);
+ EXPECT_TRUE(IsExpectedIndex);
+ EXPECT_EQ(RepeatedRepetitionData[StartIdx], 1u);
+ EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 2u);
+ EXPECT_EQ(RepeatedRepetitionData[StartIdx + 2], 3u);
+ }
+ } else {
+ if (RepeatedRepetitionData[RS.StartIndices[0]] == 1u) { // {1, 2}
+ EXPECT_EQ(RS.StartIndices.size(), 4u);
+ for (unsigned StartIdx : RS.StartIndices) {
+ IsExpectedIndex = (StartIdx == 0u || StartIdx == 3u ||
+ StartIdx == 6u || StartIdx == 10u);
+ EXPECT_TRUE(IsExpectedIndex);
+ EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 2u);
+ }
+ } else { // {2, 3}
+ EXPECT_EQ(RS.StartIndices.size(), 2u);
+ for (unsigned StartIdx : RS.StartIndices) {
+ IsExpectedIndex = (StartIdx == 7u || StartIdx == 11u);
+ EXPECT_TRUE(IsExpectedIndex);
+ EXPECT_EQ(RepeatedRepetitionData[StartIdx], 2u);
+ EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 3u);
+ }
+ }
+ }
+ }
+}
+
} // namespace