diff options
author | Jason Molenda <jmolenda@apple.com> | 2025-09-19 19:53:26 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-09-19 19:53:26 -0700 |
commit | 3e57a0d01c90c09b1e40d8edfe48f8e5a63e2de6 (patch) | |
tree | 87d47436ce27a7523af0d5dd5aa1cbe36cda31f2 /lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | |
parent | 43e4757d85ccb2a4a0a6a2f4ceff27e38b77c542 (diff) | |
download | llvm-3e57a0d01c90c09b1e40d8edfe48f8e5a63e2de6.zip llvm-3e57a0d01c90c09b1e40d8edfe48f8e5a63e2de6.tar.gz llvm-3e57a0d01c90c09b1e40d8edfe48f8e5a63e2de6.tar.bz2 |
[lldb][MachO] Local structs for larger VA offsets (#159849)
The Mach-O file format has several load commands which specify the
location of data in the file in UInt32 offsets. lldb uses these same
structures to track the offsets of the binary in virtual address space
when it is running. Normally a binary is loaded in memory contiguously,
so this is fine, but on Darwin systems there is a "system shared cache"
where all system libraries are combined into one region of memory and
pre-linked. The shared cache has the TEXT segments for every binary
loaded contiguously, then the DATA segments, and finally a shared common
LINKEDIT segment for all binaries. The virtual address offset from the
TEXT segment for a libray to the LINKEDIT may exceed 4GB of virtual
address space depending on the structure of the shared cache, so this
use of a UInt32 offset will not work.
There was an initial instance of this issue that I fixed last November
in https://github.com/llvm/llvm-project/pull/117832 where I fixed this
issue for the LC_SYMTAB / `symtab_command` structure. But we have the
same issue now with three additional structures;
`linkedit_data_command`, `dyld_info_command`, and `dysymtab_command`.
For all of these we can see the pattern of `dyld_info.export_off +=
linkedit_slide` applied to the offset fields in ObjectFileMachO.
This defines local structures that mirror the Mach-O structures, except
that it uses UInt64 offset fields so we can reuse the same field for a
large virtual address offset at runtime. I defined ctor's from the
genuine structures, as well as operator= methods so the structures can
be read from the Mach-O binary into the standard object, then copied
into our local expanded versions of them. These structures are ABI in
Mach-O and cannot change their layout.
The alternative is to create local variables alongside these Mach-O load
command objects for the offsets that we care about, adjust those by the
correct VA offsets, and only use those local variables instead of the
fields in the objects. I took the approach of the local enhanced
structure in November and I think it is the cleaner approach.
rdar://160384968
Diffstat (limited to 'lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp')
-rw-r--r-- | lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 76 |
1 files changed, 33 insertions, 43 deletions
diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 924e340..fada1fd 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -2156,10 +2156,10 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { LLDB_LOG(log, "Parsing symbol table for {0}", file_name); Progress progress("Parsing symbol table", file_name); - llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0}; - llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0}; - llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - llvm::MachO::dysymtab_command dysymtab = m_dysymtab; + LinkeditDataCommandLargeOffsets function_starts_load_command; + LinkeditDataCommandLargeOffsets exports_trie_load_command; + DyldInfoCommandLargeOffsets dyld_info; + DysymtabCommandLargeOffsets dysymtab(m_dysymtab); SymtabCommandLargeOffsets symtab_load_command; // The data element of type bool indicates that this entry is thumb // code. @@ -2196,32 +2196,24 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { break; // Watch for the symbol table load command switch (lc.cmd) { - case LC_SYMTAB: - // struct symtab_command { - // uint32_t cmd; /* LC_SYMTAB */ - // uint32_t cmdsize; /* sizeof(struct symtab_command) */ - // uint32_t symoff; /* symbol table offset */ - // uint32_t nsyms; /* number of symbol table entries */ - // uint32_t stroff; /* string table offset */ - // uint32_t strsize; /* string table size in bytes */ - // }; - symtab_load_command.cmd = lc.cmd; - symtab_load_command.cmdsize = lc.cmdsize; - symtab_load_command.symoff = m_data.GetU32(&offset); - symtab_load_command.nsyms = m_data.GetU32(&offset); - symtab_load_command.stroff = m_data.GetU32(&offset); - symtab_load_command.strsize = m_data.GetU32(&offset); - break; + case LC_SYMTAB: { + llvm::MachO::symtab_command lc_obj; + if (m_data.GetU32(&offset, &lc_obj.symoff, 4)) { + lc_obj.cmd = lc.cmd; + lc_obj.cmdsize = lc.cmdsize; + symtab_load_command = lc_obj; + } + } break; case LC_DYLD_INFO: - case LC_DYLD_INFO_ONLY: - if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) { - dyld_info.cmd = lc.cmd; - dyld_info.cmdsize = lc.cmdsize; - } else { - memset(&dyld_info, 0, sizeof(dyld_info)); + case LC_DYLD_INFO_ONLY: { + llvm::MachO::dyld_info_command lc_obj; + if (m_data.GetU32(&offset, &lc_obj.rebase_off, 10)) { + lc_obj.cmd = lc.cmd; + lc_obj.cmdsize = lc.cmdsize; + dyld_info = lc_obj; } - break; + } break; case LC_LOAD_DYLIB: case LC_LOAD_WEAK_DYLIB: @@ -2245,22 +2237,20 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { } } break; - case LC_DYLD_EXPORTS_TRIE: - exports_trie_load_command.cmd = lc.cmd; - exports_trie_load_command.cmdsize = lc.cmdsize; - if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) == - nullptr) // fill in offset and size fields - memset(&exports_trie_load_command, 0, - sizeof(exports_trie_load_command)); - break; - case LC_FUNCTION_STARTS: - function_starts_load_command.cmd = lc.cmd; - function_starts_load_command.cmdsize = lc.cmdsize; - if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) == - nullptr) // fill in data offset and size fields - memset(&function_starts_load_command, 0, - sizeof(function_starts_load_command)); - break; + case LC_DYLD_EXPORTS_TRIE: { + llvm::MachO::linkedit_data_command lc_obj; + lc_obj.cmd = lc.cmd; + lc_obj.cmdsize = lc.cmdsize; + if (m_data.GetU32(&offset, &lc_obj.dataoff, 2)) + exports_trie_load_command = lc_obj; + } break; + case LC_FUNCTION_STARTS: { + llvm::MachO::linkedit_data_command lc_obj; + lc_obj.cmd = lc.cmd; + lc_obj.cmdsize = lc.cmdsize; + if (m_data.GetU32(&offset, &lc_obj.dataoff, 2)) + function_starts_load_command = lc_obj; + } break; case LC_UUID: { const uint8_t *uuid_bytes = m_data.PeekData(offset, 16); |