aboutsummaryrefslogtreecommitdiff
path: root/bolt
diff options
context:
space:
mode:
authorAmir Ayupov <aaupov@fb.com>2024-01-11 14:35:37 -0800
committerGitHub <noreply@github.com>2024-01-11 14:35:37 -0800
commit8fb8ad66c95a51b82e5c2876ed925b5512ce6b83 (patch)
tree1043a5a9bb1f68115fbd2af7ceaeeef5932fd76f /bolt
parentbbe07989d7225aaff9613b71dbd7f00e8d738b22 (diff)
downloadllvm-8fb8ad66c95a51b82e5c2876ed925b5512ce6b83.zip
llvm-8fb8ad66c95a51b82e5c2876ed925b5512ce6b83.tar.gz
llvm-8fb8ad66c95a51b82e5c2876ed925b5512ce6b83.tar.bz2
[BOLT] Delta-encode function start addresses in BAT (#76902)
Further reduce the size of BAT section: - large binary: to 12716312 bytes (0.33x original), - medium binary: to 1649472 bytes (0.28x original), - small binary: to 428 bytes (0.30x original). Test Plan: Updated bolt/test/X86/bolt-address-translation.test
Diffstat (limited to 'bolt')
-rw-r--r--bolt/docs/BAT.md4
-rw-r--r--bolt/lib/Profile/BoltAddressTranslation.cpp8
-rw-r--r--bolt/test/X86/bolt-address-translation.test2
3 files changed, 10 insertions, 4 deletions
diff --git a/bolt/docs/BAT.md b/bolt/docs/BAT.md
index b44a51b..8a3b2fc 100644
--- a/bolt/docs/BAT.md
+++ b/bolt/docs/BAT.md
@@ -64,9 +64,11 @@ Header:
| `NumFuncs` | ULEB128 | Number of functions in the functions table |
The header is followed by Functions table with `NumFuncs` entries.
+Output binary addresses are delta encoded, meaning that only the difference with
+the previous output address is stored. Addresses implicitly start at zero.
| Entry | Encoding | Description |
| ------ | ------| ----------- |
-| `Address` | ULEB128 | Function address in the output binary |
+| `Address` | Delta, ULEB128 | Function address in the output binary |
| `NumEntries` | ULEB128 | Number of address translation entries for a function |
Function header is followed by `NumEntries` pairs of offsets for current
diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp
index 6079b97..19b63d4 100644
--- a/bolt/lib/Profile/BoltAddressTranslation.cpp
+++ b/bolt/lib/Profile/BoltAddressTranslation.cpp
@@ -106,13 +106,15 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
const uint32_t NumFuncs = Maps.size();
encodeULEB128(NumFuncs, OS);
LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << " functions for BAT.\n");
+ uint64_t PrevAddress = 0;
for (auto &MapEntry : Maps) {
const uint64_t Address = MapEntry.first;
MapTy &Map = MapEntry.second;
const uint32_t NumEntries = Map.size();
LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
<< Twine::utohexstr(Address) << ".\n");
- encodeULEB128(Address, OS);
+ encodeULEB128(Address - PrevAddress, OS);
+ PrevAddress = Address;
encodeULEB128(NumEntries, OS);
uint64_t InOffset = 0, OutOffset = 0;
// Output and Input addresses and delta-encoded
@@ -160,8 +162,10 @@ std::error_code BoltAddressTranslation::parse(StringRef Buf) {
Error Err(Error::success());
const uint32_t NumFunctions = DE.getULEB128(&Offset, &Err);
LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << " functions\n");
+ uint64_t PrevAddress = 0;
for (uint32_t I = 0; I < NumFunctions; ++I) {
- const uint64_t Address = DE.getULEB128(&Offset, &Err);
+ const uint64_t Address = PrevAddress + DE.getULEB128(&Offset, &Err);
+ PrevAddress = Address;
const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
MapTy Map;
diff --git a/bolt/test/X86/bolt-address-translation.test b/bolt/test/X86/bolt-address-translation.test
index fc57668..a232f78 100644
--- a/bolt/test/X86/bolt-address-translation.test
+++ b/bolt/test/X86/bolt-address-translation.test
@@ -37,7 +37,7 @@
# CHECK: BOLT: 3 out of 7 functions were overwritten.
# CHECK: BOLT-INFO: Wrote 6 BAT maps
# CHECK: BOLT-INFO: Wrote 3 BAT cold-to-hot entries
-# CHECK: BOLT-INFO: BAT section size (bytes): 436
+# CHECK: BOLT-INFO: BAT section size (bytes): 428
#
# usqrt mappings (hot part). We match against any key (left side containing
# the bolted binary offsets) because BOLT may change where it puts instructions