diff options
author | Shoaib Meenai <smeenai@fb.com> | 2024-08-09 13:31:43 -0700 |
---|---|---|
committer | Shoaib Meenai <smeenai@fb.com> | 2024-08-09 13:31:43 -0700 |
commit | 90c6c7cc7cbab182e0765dbfad028de13c472525 (patch) | |
tree | 1c35941ce320ca167f96cc92728bfee7344a5762 | |
parent | 5297b750e54dafe16cc13f24b8d5478214e83682 (diff) | |
parent | ccaad86b4d8251cddf1f4c3a400acd4b698552bf (diff) | |
download | llvm-users/smeenai/sprhmaptool-implement-simple-string-deduplication-1.zip llvm-users/smeenai/sprhmaptool-implement-simple-string-deduplication-1.tar.gz llvm-users/smeenai/sprhmaptool-implement-simple-string-deduplication-1.tar.bz2 |
[𝘀𝗽𝗿] initial versionusers/smeenai/sprhmaptool-implement-simple-string-deduplication-1
Created using spr 1.3.4
-rwxr-xr-x | clang/utils/hmaptool/hmaptool | 38 |
1 files changed, 28 insertions, 10 deletions
diff --git a/clang/utils/hmaptool/hmaptool b/clang/utils/hmaptool/hmaptool index d775463..2ca769a 100755 --- a/clang/utils/hmaptool/hmaptool +++ b/clang/utils/hmaptool/hmaptool @@ -110,6 +110,24 @@ class HeaderMap(object): yield (self.get_string(key_idx), self.get_string(prefix_idx) + self.get_string(suffix_idx)) +class StringTable: + def __init__(self): + # A string table offset of 0 is interpreted as an empty bucket, so it's + # important we don't assign an actual string to that offset. + self.table = "\0" + # For the same reason we don't want the empty string having a 0 offset. + self.offsets = {} + + def add(self, string): + offset = self.offsets.get(string) + if offset: + return offset + + offset = len(self.table) + self.table += string + "\0" + self.offsets[string] = offset + return offset + ### def action_dump(name, args): @@ -182,7 +200,7 @@ def action_write(name, args): table = [(0, 0, 0) for i in range(num_buckets)] max_value_len = 0 - strtable = "\0" + strtable = StringTable() for key,value in mappings.items(): if not isinstance(key, str): key = key.decode('utf-8') @@ -190,14 +208,14 @@ def action_write(name, args): value = value.decode('utf-8') max_value_len = max(max_value_len, len(value)) - key_idx = len(strtable) - strtable += key + '\0' - prefix = os.path.dirname(value) + '/' - suffix = os.path.basename(value) - prefix_idx = len(strtable) - strtable += prefix + '\0' - suffix_idx = len(strtable) - strtable += suffix + '\0' + key_idx = strtable.add(key) + prefix, suffix = os.path.split(value) + # This guarantees that prefix + suffix == value in all cases, including when + # prefix is empty or contains a trailing slash or suffix is empty (hence the use + # of `len(value) - len(suffix)` instead of just `-len(suffix)`. + prefix += value[len(prefix) : len(value) - len(suffix)] + prefix_idx = strtable.add(prefix) + suffix_idx = strtable.add(suffix) hash = hmap_hash(key) for i in range(num_buckets): @@ -225,7 +243,7 @@ def action_write(name, args): f.write(struct.pack(header_fmt, *header)) for bucket in table: f.write(struct.pack(bucket_fmt, *bucket)) - f.write(strtable.encode()) + f.write(strtable.table.encode()) def action_tovfs(name, args): "convert a headermap to a VFS layout" |