aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Vogelsgesang <avogelsgesang@tableau.com>2022-07-27 13:55:58 -0700
committerAdrian Vogelsgesang <avogelsgesang@salesforce.com>2022-08-09 16:19:13 -0700
commitdf9a23e2feda18f0308b6d4dbd591ebe6b605aa4 (patch)
tree9b23b6f5c3ae4e74d8b670dadf258b90758b1b3c
parent3f8ae7efa866e581a16e9ccc8e29744722f13fff (diff)
downloadllvm-df9a23e2feda18f0308b6d4dbd591ebe6b605aa4.zip
llvm-df9a23e2feda18f0308b6d4dbd591ebe6b605aa4.tar.gz
llvm-df9a23e2feda18f0308b6d4dbd591ebe6b605aa4.tar.bz2
[libunwind] Use `_dl_find_object` if available
As shown in P2544R0 [1] and the accompanying benchmark [2], the current unwinding logic does not scale for multi-threaded programs. This is because `dl_iterate_phdr` takes a global lock. glibc 2.35 added `_dl_find_object` which directly returns the unwind info for a given target address. `_dl_find_object` is fully lock-free and hence allows parallel exception unwinding on multiple threads. With this commit, libunwind now takes advantage of `_dl_find_object`. Thereby, this commit improves libunwind's performance on benchmark [2] for unwinding exception on 20 threads from 1103ms to 78ms. (measured on Intel Xeon Silver 4114 with 20 physical cores) [1] https://isocpp.org/files/papers/P2544R0.html [2] https://github.com/neumannt/exceptionperformance Detailed performance numbers from the benchmark: Before: > Testing unwinding performance: sqrt computation with occasional errors > > testing baseline using 1 2 4 8 16 20 threads > failure rate 0%: 34 35 34 35 35 36 > testing exceptions using 1 2 4 8 16 20 threads > failure rate 0%: 16 32 33 34 35 36 > failure rate 0.1%: 16 32 34 36 35 36 > failure rate 1%: 20 40 40 43 90 113 > failure rate 10%: 59 92 140 304 880 1103 > [...] > > Testing invocation overhead: recursive fib with occasional errors > > testing exceptions using 1 2 4 8 16 20 threads > failure rate 0%: 19 32 37 38 39 36 > failure rate 0.1%: 22 32 40 40 39 34 > failure rate 1%: 20 28 38 39 48 40 > failure rate 10%: 25 39 44 50 92 113 After: > Testing unwinding performance: sqrt computation with occasional errors > > testing baseline using 1 2 4 8 16 20 threads > failure rate 0%: 19 30 35 38 39 35 > testing baseline using 1 2 4 8 16 20 threads > failure rate 0%: 32 35 33 34 34 36 > testing exceptions using 1 2 4 8 16 20 threads > failure rate 0%: 16 35 33 37 35 35 > failure rate 0.1%: 16 32 36 33 34 37 > failure rate 1%: 21 37 39 40 40 41 > failure rate 10%: 72 75 76 80 80 78 > [...] > > Testing invocation overhead: recursive fib with occasional errors > > testing baseline using 1 2 4 8 16 20 threads > failure rate 0%: 18 35 37 34 38 37 > testing exceptions using 1 2 4 8 16 20 threads > failure rate 0%: 19 33 40 40 41 39 > failure rate 0.1%: 21 33 39 38 39 38 > failure rate 1%: 20 36 39 40 41 40 > failure rate 10%: 25 45 41 42 44 43 Differential Revision: https://reviews.llvm.org/D130668
-rw-r--r--libunwind/src/AddressSpace.hpp55
1 files changed, 55 insertions, 0 deletions
diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp
index 36c9f5a9..f1ba94e 100644
--- a/libunwind/src/AddressSpace.hpp
+++ b/libunwind/src/AddressSpace.hpp
@@ -601,6 +601,61 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr,
if (info.arm_section && info.arm_section_length)
return true;
#elif defined(_LIBUNWIND_USE_DL_ITERATE_PHDR)
+ // Use DLFO_STRUCT_HAS_EH_DBASE to determine the existence of
+ // `_dl_find_object`. Use _LIBUNWIND_SUPPORT_DWARF_INDEX, because libunwind
+ // support for _dl_find_object on other unwind formats is not implemented,
+ // yet.
+#if defined(DLFO_STRUCT_HAS_EH_DBASE) & defined(_LIBUNWIND_SUPPORT_DWARF_INDEX)
+ // We expect to run on a platform which does not use a base address for
+ // exception information.
+#if DLFO_STRUCT_HAS_EH_DBASE
+#error dlfo_eh_dbase is not supported for DWARF-based unwinding
+#endif
+ // We expect `_dl_find_object` to return PT_GNU_EH_FRAME.
+#if DLFO_EH_SEGMENT_TYPE != PT_GNU_EH_FRAME
+#error _dl_find_object retrieves an unexpected section type
+#endif
+ // We look-up `dl_find_object` dynamically at runtime to ensure backwards
+ // compatibility with earlier version of glibc not yet providing it. On older
+ // systems, we gracefully fallback to `dl_iterate_phdr`. Cache the pointer
+ // so we only look it up once. Do manual lock to avoid _cxa_guard_acquire.
+ static decltype(_dl_find_object) *dlFindObject;
+ static bool dlFindObjectChecked = false;
+ if (!dlFindObjectChecked) {
+ dlFindObject = reinterpret_cast<decltype(_dl_find_object) *>(
+ dlsym(RTLD_DEFAULT, "_dl_find_object"));
+ dlFindObjectChecked = true;
+ }
+ // Try to find the unwind info using `dl_find_object`
+ dl_find_object findResult;
+ if (dlFindObject && dlFindObject((void *)targetAddr, &findResult) == 0) {
+ if (findResult.dlfo_eh_frame == nullptr) {
+ // Found an entry for `targetAddr`, but there is no unwind info.
+ return false;
+ }
+ info.dso_base = reinterpret_cast<uintptr_t>(findResult.dlfo_map_start);
+ info.text_segment_length = static_cast<size_t>(
+ (char *)findResult.dlfo_map_end - (char *)findResult.dlfo_map_start);
+
+ // Record the start of PT_GNU_EH_FRAME.
+ info.dwarf_index_section =
+ reinterpret_cast<uintptr_t>(findResult.dlfo_eh_frame);
+ // `_dl_find_object` does not give us the size of PT_GNU_EH_FRAME.
+ // Setting length to `SIZE_MAX` effectively disables all range checks.
+ info.dwarf_index_section_length = SIZE_MAX;
+ EHHeaderParser<LocalAddressSpace>::EHHeaderInfo hdrInfo;
+ if (!EHHeaderParser<LocalAddressSpace>::decodeEHHdr(
+ *this, info.dwarf_index_section, info.dwarf_index_section_length,
+ hdrInfo)) {
+ return false;
+ }
+ // Record the start of the FDE and use SIZE_MAX to indicate that we do
+ // not know the end address.
+ info.dwarf_section = hdrInfo.eh_frame_ptr;
+ info.dwarf_section_length = SIZE_MAX;
+ return true;
+ }
+#endif
dl_iterate_cb_data cb_data = {this, &info, targetAddr};
int found = dl_iterate_phdr(findUnwindSectionsByPhdr, &cb_data);
return static_cast<bool>(found);