aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShoaib Meenai <smeenai@fb.com>2024-08-09 13:31:43 -0700
committerShoaib Meenai <smeenai@fb.com>2024-08-09 13:31:43 -0700
commit90c6c7cc7cbab182e0765dbfad028de13c472525 (patch)
tree1c35941ce320ca167f96cc92728bfee7344a5762
parent5297b750e54dafe16cc13f24b8d5478214e83682 (diff)
parentccaad86b4d8251cddf1f4c3a400acd4b698552bf (diff)
downloadllvm-users/smeenai/sprhmaptool-implement-simple-string-deduplication-1.zip
llvm-users/smeenai/sprhmaptool-implement-simple-string-deduplication-1.tar.gz
llvm-users/smeenai/sprhmaptool-implement-simple-string-deduplication-1.tar.bz2
Created using spr 1.3.4
-rwxr-xr-xclang/utils/hmaptool/hmaptool38
1 files changed, 28 insertions, 10 deletions
diff --git a/clang/utils/hmaptool/hmaptool b/clang/utils/hmaptool/hmaptool
index d775463..2ca769a 100755
--- a/clang/utils/hmaptool/hmaptool
+++ b/clang/utils/hmaptool/hmaptool
@@ -110,6 +110,24 @@ class HeaderMap(object):
yield (self.get_string(key_idx),
self.get_string(prefix_idx) + self.get_string(suffix_idx))
+class StringTable:
+ def __init__(self):
+ # A string table offset of 0 is interpreted as an empty bucket, so it's
+ # important we don't assign an actual string to that offset.
+ self.table = "\0"
+ # For the same reason we don't want the empty string having a 0 offset.
+ self.offsets = {}
+
+ def add(self, string):
+ offset = self.offsets.get(string)
+ if offset:
+ return offset
+
+ offset = len(self.table)
+ self.table += string + "\0"
+ self.offsets[string] = offset
+ return offset
+
###
def action_dump(name, args):
@@ -182,7 +200,7 @@ def action_write(name, args):
table = [(0, 0, 0)
for i in range(num_buckets)]
max_value_len = 0
- strtable = "\0"
+ strtable = StringTable()
for key,value in mappings.items():
if not isinstance(key, str):
key = key.decode('utf-8')
@@ -190,14 +208,14 @@ def action_write(name, args):
value = value.decode('utf-8')
max_value_len = max(max_value_len, len(value))
- key_idx = len(strtable)
- strtable += key + '\0'
- prefix = os.path.dirname(value) + '/'
- suffix = os.path.basename(value)
- prefix_idx = len(strtable)
- strtable += prefix + '\0'
- suffix_idx = len(strtable)
- strtable += suffix + '\0'
+ key_idx = strtable.add(key)
+ prefix, suffix = os.path.split(value)
+ # This guarantees that prefix + suffix == value in all cases, including when
+ # prefix is empty or contains a trailing slash or suffix is empty (hence the use
+ # of `len(value) - len(suffix)` instead of just `-len(suffix)`.
+ prefix += value[len(prefix) : len(value) - len(suffix)]
+ prefix_idx = strtable.add(prefix)
+ suffix_idx = strtable.add(suffix)
hash = hmap_hash(key)
for i in range(num_buckets):
@@ -225,7 +243,7 @@ def action_write(name, args):
f.write(struct.pack(header_fmt, *header))
for bucket in table:
f.write(struct.pack(bucket_fmt, *bucket))
- f.write(strtable.encode())
+ f.write(strtable.table.encode())
def action_tovfs(name, args):
"convert a headermap to a VFS layout"