diff options
Diffstat (limited to 'stdlib')
-rw-r--r-- | stdlib/Makefile | 5 | ||||
-rw-r--r-- | stdlib/cxa_thread_atexit_impl.c | 90 | ||||
-rw-r--r-- | stdlib/tst-tls-atexit-nodelete.c | 24 | ||||
-rw-r--r-- | stdlib/tst-tls-atexit.c | 60 |
4 files changed, 146 insertions, 33 deletions
diff --git a/stdlib/Makefile b/stdlib/Makefile index 7fc5a80..402466a 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -74,7 +74,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \ tst-makecontext3 bug-getcontext bug-fmtmsg1 \ tst-secure-getenv tst-strtod-overflow tst-strtod-round \ tst-tininess tst-strtod-underflow tst-tls-atexit \ - tst-setcontext3 + tst-setcontext3 tst-tls-atexit-nodelete tests-static := tst-secure-getenv modules-names = tst-tls-atexit-lib @@ -159,6 +159,9 @@ tst-tls-atexit-lib.so-no-z-defs = yes $(objpfx)tst-tls-atexit: $(shared-thread-library) $(libdl) $(objpfx)tst-tls-atexit.out: $(objpfx)tst-tls-atexit-lib.so +$(objpfx)tst-tls-atexit-nodelete: $(shared-thread-library) $(libdl) +$(objpfx)tst-tls-atexit-nodelete.out: $(objpfx)tst-tls-atexit-lib.so + $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3 $(SHELL) $< $(common-objpfx) '$(test-program-prefix-before-env)' \ '$(run-program-env)' '$(test-program-prefix-after-env)' \ diff --git a/stdlib/cxa_thread_atexit_impl.c b/stdlib/cxa_thread_atexit_impl.c index 70c97da..8e26380 100644 --- a/stdlib/cxa_thread_atexit_impl.c +++ b/stdlib/cxa_thread_atexit_impl.c @@ -16,6 +16,61 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ +/* CONCURRENCY NOTES: + + This documents concurrency for the non-POD TLS destructor registration, + calling and destruction. The functions __cxa_thread_atexit_impl, + _dl_close_worker and __call_tls_dtors are the three main routines that may + run concurrently and access shared data. The shared data in all possible + combinations of all three functions are the link map list, a link map for a + DSO and the link map member l_tls_dtor_count. + + __cxa_thread_atexit_impl acquires the load_lock before accessing any shared + state and hence multiple of its instances can safely execute concurrently. + + _dl_close_worker acquires the load_lock before accessing any shared state as + well and hence can concurrently execute multiple of its own instances as + well as those of __cxa_thread_atexit_impl safely. Not all accesses to + l_tls_dtor_count are protected by the load lock, so we need to synchronize + using atomics. + + __call_tls_dtors accesses the l_tls_dtor_count without taking the lock; it + decrements the value by one. It does not need the big lock because it does + not access any other shared state except for the current DSO link map and + its member l_tls_dtor_count. + + Correspondingly, _dl_close_worker loads l_tls_dtor_count and if it is zero, + unloads the DSO, thus deallocating the current link map. This is the goal + of maintaining l_tls_dtor_count - to unload the DSO and free resources if + there are no pending destructors to be called. + + We want to eliminate the inconsistent state where the DSO is unloaded in + _dl_close_worker before it is used in __call_tls_dtors. This could happen + if __call_tls_dtors uses the link map after it sets l_tls_dtor_count to 0, + since _dl_close_worker will conclude from the 0 l_tls_dtor_count value that + it is safe to unload the DSO. Hence, to ensure that this does not happen, + the following conditions must be met: + + 1. In _dl_close_worker, the l_tls_dtor_count load happens before the DSO + is unload and its link map is freed + 2. The link map dereference in __call_tls_dtors happens before the + l_tls_dtor_count dereference. + + To ensure this, the l_tls_dtor_count decrement in __call_tls_dtors should + have release semantics and the load in _dl_close_worker should have acquire + semantics. + + Concurrent executions of __call_tls_dtors should only ensure that the value + is accessed atomically; no reordering constraints need to be considered. + Likewise for the increment of l_tls_dtor_count in __cxa_thread_atexit_impl. + + There is still a possibility on concurrent execution of _dl_close_worker and + __call_tls_dtors where _dl_close_worker reads the value of l_tls_dtor_count + as 1, __call_tls_dtors decrements the value of l_tls_dtor_count but + _dl_close_worker does not unload the DSO, having read the old value. This + is not very different from a case where __call_tls_dtors is called after + _dl_close_worker on the DSO and hence is an accepted execution. */ + #include <stdlib.h> #include <ldsodefs.h> @@ -49,9 +104,11 @@ __cxa_thread_atexit_impl (dtor_func func, void *obj, void *dso_symbol) new->next = tls_dtor_list; tls_dtor_list = new; - /* See if we already encountered the DSO. */ + /* We have to acquire the big lock to prevent a racing dlclose from pulling + our DSO from underneath us while we're setting up our destructor. */ __rtld_lock_lock_recursive (GL(dl_load_lock)); + /* See if we already encountered the DSO. */ if (__glibc_unlikely (dso_symbol_cache != dso_symbol)) { ElfW(Addr) caller = (ElfW(Addr)) dso_symbol; @@ -62,16 +119,17 @@ __cxa_thread_atexit_impl (dtor_func func, void *obj, void *dso_symbol) program (we hope). */ lm_cache = l ? l : GL(dl_ns)[LM_ID_BASE]._ns_loaded; } - /* A destructor could result in a thread_local construction and the former - could have cleared the flag. */ - if (lm_cache->l_type == lt_loaded && lm_cache->l_tls_dtor_count == 0) - lm_cache->l_flags_1 |= DF_1_NODELETE; - - new->map = lm_cache; - new->map->l_tls_dtor_count++; + /* This increment may only be concurrently observed either by the decrement + in __call_tls_dtors since the other l_tls_dtor_count access in + _dl_close_worker is protected by the load lock. The execution in + __call_tls_dtors does not really depend on this value beyond the fact that + it should be atomic, so Relaxed MO should be sufficient. */ + atomic_fetch_add_relaxed (&lm_cache->l_tls_dtor_count, 1); __rtld_lock_unlock_recursive (GL(dl_load_lock)); + new->map = lm_cache; + return 0; } @@ -83,19 +141,15 @@ __call_tls_dtors (void) while (tls_dtor_list) { struct dtor_list *cur = tls_dtor_list; - tls_dtor_list = tls_dtor_list->next; + tls_dtor_list = tls_dtor_list->next; cur->func (cur->obj); - __rtld_lock_lock_recursive (GL(dl_load_lock)); - - /* Allow DSO unload if count drops to zero. */ - cur->map->l_tls_dtor_count--; - if (cur->map->l_tls_dtor_count == 0 && cur->map->l_type == lt_loaded) - cur->map->l_flags_1 &= ~DF_1_NODELETE; - - __rtld_lock_unlock_recursive (GL(dl_load_lock)); - + /* Ensure that the MAP dereference happens before + l_tls_dtor_count decrement. That way, we protect this access from a + potential DSO unload in _dl_close_worker, which happens when + l_tls_dtor_count is 0. See CONCURRENCY NOTES for more detail. */ + atomic_fetch_add_release (&cur->map->l_tls_dtor_count, -1); free (cur); } } diff --git a/stdlib/tst-tls-atexit-nodelete.c b/stdlib/tst-tls-atexit-nodelete.c new file mode 100644 index 0000000..ff290fa --- /dev/null +++ b/stdlib/tst-tls-atexit-nodelete.c @@ -0,0 +1,24 @@ +/* Verify that a RTLD_NODELETE DSO is not unloaded even if its TLS objects are + destroyed. + + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define NO_DELETE 1 +#define H2_RTLD_FLAGS (RTLD_LAZY | RTLD_NODELETE) +#define LOADED_IS_GOOD true +#include "tst-tls-atexit.c" diff --git a/stdlib/tst-tls-atexit.c b/stdlib/tst-tls-atexit.c index cea655d..e9839d8 100644 --- a/stdlib/tst-tls-atexit.c +++ b/stdlib/tst-tls-atexit.c @@ -16,12 +16,20 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -/* This test dynamically loads a DSO and spawns a thread that subsequently - calls into the DSO to register a destructor for an object in the DSO and - then calls dlclose on the handle for the DSO. When the thread exits, the - DSO should not be unloaded or else the destructor called during thread exit - will crash. Further in the main thread, the DSO is opened and closed again, - at which point the DSO should be unloaded. */ +/* For the default case, i.e. NO_DELETE not defined, the test dynamically loads + a DSO and spawns a thread that subsequently calls into the DSO to register a + destructor for an object in the DSO and then calls dlclose on the handle for + the DSO. When the thread exits, the DSO should not be unloaded or else the + destructor called during thread exit will crash. Further in the main + thread, the DSO is opened and closed again, at which point the DSO should be + unloaded. + + When NO_DELETE is defined, the DSO is loaded twice, once with just RTLD_LAZY + flag and the second time with the RTLD_NODELETE flag set. The thread is + spawned, destructor registered and then thread exits without closing the + DSO. In the main thread, the first handle is then closed, followed by the + second handle. In the end, the DSO should remain loaded due to the + RTLD_NODELETE flag being set in the second dlopen call. */ #include <dlfcn.h> #include <pthread.h> @@ -31,6 +39,14 @@ #include <errno.h> #include <link.h> +#ifndef NO_DELETE +# define LOADED_IS_GOOD false +#endif + +#ifndef H2_RTLD_FLAGS +# define H2_RTLD_FLAGS (RTLD_LAZY) +#endif + #define DSO_NAME "$ORIGIN/tst-tls-atexit-lib.so" /* Walk through the map in the _r_debug structure to see if our lib is still @@ -43,7 +59,10 @@ is_loaded (void) for (; lm; lm = lm->l_next) if (lm->l_type == lt_loaded && lm->l_name && strcmp (basename (DSO_NAME), basename (lm->l_name)) == 0) - return true; + { + printf ("%s is still loaded\n", lm->l_name); + return true; + } return false; } @@ -63,7 +82,9 @@ reg_dtor_and_close (void *h) reg_dtor (); +#ifndef NO_DELETE dlclose (h); +#endif return NULL; } @@ -104,19 +125,30 @@ do_test (void) return 1; } +#ifndef NO_DELETE if (spawn_thread (h1) != 0) return 1; +#endif - /* Now this should unload the DSO. FIXME: This is a bug, calling dlclose - like this is actually wrong, but it works because cxa_thread_atexit_impl - has a bug which results in dlclose allowing this to work. */ - dlclose (h1); + void *h2 = dlopen (DSO_NAME, H2_RTLD_FLAGS); + if (h2 == NULL) + { + printf ("h2: Unable to load DSO: %s\n", dlerror ()); + return 1; + } - /* Check link maps to ensure that the DSO has unloaded. */ - if (is_loaded ()) +#ifdef NO_DELETE + if (spawn_thread (h1) != 0) return 1; - return 0; + dlclose (h1); +#endif + dlclose (h2); + + /* Check link maps to ensure that the DSO has unloaded. In the normal case, + the DSO should be unloaded if there are no uses. However, if one of the + dlopen calls were with RTLD_NODELETE, the DSO should remain loaded. */ + return is_loaded () == LOADED_IS_GOOD ? 0 : 1; } #define TEST_FUNCTION do_test () |