aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorDingdWang <wdd12358@gmail.com>2025-07-25 22:45:01 +0800
committerGitHub <noreply@github.com>2025-07-25 16:45:01 +0200
commit0c6784c9514d0ddb257bf0fd797969e0ae602882 (patch)
tree31b9455d60c01bb02f387b8a288359386cf6e50f /llvm
parent74502168c4408404b2205838d742b930a4e59f90 (diff)
downloadllvm-0c6784c9514d0ddb257bf0fd797969e0ae602882.zip
llvm-0c6784c9514d0ddb257bf0fd797969e0ae602882.tar.gz
llvm-0c6784c9514d0ddb257bf0fd797969e0ae602882.tar.bz2
[MemDep] Optimize SortNonLocalDepInfoCache sorting strategy for large caches with few unsorted entries (#143107)
During compilation of large files with many branches, I observed that the function `SortNonLocalDepInfoCache` in `MemoryDependenceAnalysis` becomes a significant performance bottleneck. This is because `Cache.size()` can be very large (around 20,000), but only a small number of entries (approximately 5 to 8) actually need sorting. The original implementation performs a full sort in all cases, which is inefficient. This patch introduces a lightweight heuristic to quickly estimate the number of unsorted entries and choose a more efficient sorting method accordingly. As a result, the GVN pass runtime on a large file is reduced from approximately 26.3 minutes to 16.5 minutes.
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Analysis/MemoryDependenceAnalysis.cpp44
1 files changed, 24 insertions, 20 deletions
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 3aa9909..2b0f212 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -983,33 +983,37 @@ MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
static void
SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
unsigned NumSortedEntries) {
- switch (Cache.size() - NumSortedEntries) {
- case 0:
- // done, no new entries.
- break;
- case 2: {
- // Two new entries, insert the last one into place.
- NonLocalDepEntry Val = Cache.back();
- Cache.pop_back();
- MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
- Cache.insert(Entry, Val);
- [[fallthrough]];
+
+ // If only one entry, don't sort.
+ if (Cache.size() < 2)
+ return;
+
+ unsigned s = Cache.size() - NumSortedEntries;
+
+ // If the cache is already sorted, don't sort it again.
+ if (s == 0)
+ return;
+
+ // If no entry is sorted, sort the whole cache.
+ if (NumSortedEntries == 0) {
+ llvm::sort(Cache);
+ return;
}
- case 1:
- // One new entry, Just insert the new value at the appropriate position.
- if (Cache.size() != 1) {
+
+ // If the number of unsorted entires is small and the cache size is big, using
+ // insertion sort is faster. Here use Log2_32 to quickly choose the sort
+ // method.
+ if (s < Log2_32(Cache.size())) {
+ while (s > 0) {
NonLocalDepEntry Val = Cache.back();
Cache.pop_back();
MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- llvm::upper_bound(Cache, Val);
+ std::upper_bound(Cache.begin(), Cache.end() - s + 1, Val);
Cache.insert(Entry, Val);
+ s--;
}
- break;
- default:
- // Added many values, do a full scale sort.
+ } else {
llvm::sort(Cache);
- break;
}
}