From 9aae19cd9aaf10e7d99c56f9d7d820c5b792faeb Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 15 Jul 2000 19:02:47 +0000 Subject: Update. 2000-05-05 H.J. Lu * sysdeps/ia64/pt-machine.h (__compare_and_swap): Change it to have acquire semantics. (__compare_and_swap_with_release_semantics): New inline function. (HAS_COMPARE_AND_SWAP_WITH_RELEASE_SEMANTICS): New macro. 2000-01-28 Hans Boehm * manager.c: Fix the problem with signals at startup. Change the way that thread stacks are allocated on IA64. Clean up some of the guard page allocation stuff. 1999-12-19 H.J. Lu * internals.h (page_roundup): New. * attr.c (__pthread_attr_setguardsize); Use page_roundup instead of roundup. * manager.c (pthread_allocate_stack): Make sure guardaddr is page aligned with page_roundup if NEED_SEPARATE_REGISTER_STACK is define. 1999-12-17 Hans Boehm * manager.c (pthread_allocate_stack): Unmap the stack top if failed to map the stack bottom. Fix the guard page. (pthread_free): Fix the guard page. * pthread.c (pthread_initialize): Set rlimit correctly for NEED_SEPARATE_REGISTER_STACK. 1999-12-16 H.J. Lu * pthread.c (__pthread_initialize_manager): Pass __pthread_manager_thread_bos instead of __pthread_manager_thread_tos to __clone2. 1999-12-16 H.J. Lu * manager.c (pthread_allocate_stack): Correct the calculation of "new_thread_bottom". Remove MAP_GROWSDOWN from mmap for stack bottom. 1999-12-13 H.J. Lu * sysdeps/ia64/pt-machine.h (__compare_and_swap): Added a stop bit after setting ar.ccv. 1999-12-12 H.J. Lu * manager.c (pthread_allocate_stack): Make the starting address of the stack bottom page aligned. FIXME: it may need changes in other places. (pthread_handle_create): Likewise. 1999-12-11 Hans Boehm * manager.c (pthread_allocate_stack): Handle NEED_SEPARATE_REGISTER_STACK. (pthread_handle_create): Likewise. * pthread.c (__pthread_initialize_manager): Likewise. * sysdeps/ia64/pt-machine.h: Use r13 for thread pointer. 1999-12-02 H.J. Lu * sysdeps/ia64/pt-machine.h: New. --- linuxthreads/ChangeLog | 70 ++++++++++++++ linuxthreads/internals.h | 5 + linuxthreads/manager.c | 165 +++++++++++++++++++++++++++++---- linuxthreads/pthread.c | 29 +++++- linuxthreads/sysdeps/ia64/pt-machine.h | 106 +++++++++++++++++++++ 5 files changed, 352 insertions(+), 23 deletions(-) create mode 100644 linuxthreads/sysdeps/ia64/pt-machine.h (limited to 'linuxthreads') diff --git a/linuxthreads/ChangeLog b/linuxthreads/ChangeLog index e205a2e..10abc40 100644 --- a/linuxthreads/ChangeLog +++ b/linuxthreads/ChangeLog @@ -1,3 +1,73 @@ +2000-05-05 H.J. Lu + + * sysdeps/ia64/pt-machine.h (__compare_and_swap): Change it to + have acquire semantics. + (__compare_and_swap_with_release_semantics): New inline + function. + (HAS_COMPARE_AND_SWAP_WITH_RELEASE_SEMANTICS): New macro. + +2000-01-28 Hans Boehm + + * manager.c: Fix the problem with signals at startup. + Change the way that thread stacks are allocated on IA64. + Clean up some of the guard page allocation stuff. + +1999-12-19 H.J. Lu + + * internals.h (page_roundup): New. + * attr.c (__pthread_attr_setguardsize); Use page_roundup + instead of roundup. + * manager.c (pthread_allocate_stack): Make sure guardaddr is + page aligned with page_roundup if NEED_SEPARATE_REGISTER_STACK + is define. + +1999-12-17 Hans Boehm + + * manager.c (pthread_allocate_stack): Unmap the stack top + if failed to map the stack bottom. + Fix the guard page. + (pthread_free): Fix the guard page. + + * pthread.c (pthread_initialize): Set rlimit correctly for + NEED_SEPARATE_REGISTER_STACK. + +1999-12-16 H.J. Lu + + * pthread.c (__pthread_initialize_manager): Pass + __pthread_manager_thread_bos instead of + __pthread_manager_thread_tos to __clone2. + +1999-12-16 H.J. Lu + + * manager.c (pthread_allocate_stack): Correct the calculation + of "new_thread_bottom". Remove MAP_GROWSDOWN from mmap for + stack bottom. + +1999-12-13 H.J. Lu + + * sysdeps/ia64/pt-machine.h (__compare_and_swap): Added a stop + bit after setting ar.ccv. + +1999-12-12 H.J. Lu + + * manager.c (pthread_allocate_stack): Make the starting + address of the stack bottom page aligned. FIXME: it may + need changes in other places. + (pthread_handle_create): Likewise. + +1999-12-11 Hans Boehm + + * manager.c (pthread_allocate_stack): Handle + NEED_SEPARATE_REGISTER_STACK. + (pthread_handle_create): Likewise. + * pthread.c (__pthread_initialize_manager): Likewise. + + * sysdeps/ia64/pt-machine.h: Use r13 for thread pointer. + +1999-12-02 H.J. Lu + + * sysdeps/ia64/pt-machine.h: New. + 2000-07-13 Ulrich Drepper * wrapsyscall.c: Mark non-__ protected names as weak. diff --git a/linuxthreads/internals.h b/linuxthreads/internals.h index e3fbf8c..118eecf 100644 --- a/linuxthreads/internals.h +++ b/linuxthreads/internals.h @@ -311,6 +311,11 @@ static inline int nonexisting_handle(pthread_handle h, pthread_t id) /* Fill in defaults left unspecified by pt-machine.h. */ +/* We round up a value with page size. */ +#ifndef page_roundup +#define page_roundup(v,p) ((((size_t) (v)) + (p) - 1) & ~((p) - 1)) +#endif + /* The page size we can get from the system. This should likely not be changed by the machine file but, you never know. */ #ifndef PAGE_SIZE diff --git a/linuxthreads/manager.c b/linuxthreads/manager.c index 0ca172c..76ef6cf 100644 --- a/linuxthreads/manager.c +++ b/linuxthreads/manager.c @@ -82,6 +82,13 @@ static int main_thread_exiting = 0; static pthread_t pthread_threads_counter = 0; +#ifdef NEED_SEPARATE_REGISTER_STACK +/* Signal masks for the manager. These have to be global only when clone2 + is used since it's currently borken wrt signals in the child. */ +static sigset_t manager_mask; /* Manager normal signal mask */ +static sigset_t manager_mask_all; /* All bits set. */ +#endif + /* Forward declarations */ static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr, @@ -100,7 +107,9 @@ int __pthread_manager(void *arg) { int reqfd = (int) (long int) arg; struct pollfd ufd; - sigset_t mask; +#ifndef NEED_SEPARATE_REGISTER_STACK + sigset_t manager_mask; +#endif int n; struct pthread_request request; @@ -112,12 +121,15 @@ int __pthread_manager(void *arg) __pthread_manager_thread.p_errnop = &__pthread_manager_thread.p_errno; __pthread_manager_thread.p_h_errnop = &__pthread_manager_thread.p_h_errno; /* Block all signals except __pthread_sig_cancel and SIGTRAP */ - sigfillset(&mask); - sigdelset(&mask, __pthread_sig_cancel); /* for thread termination */ - sigdelset(&mask, SIGTRAP); /* for debugging purposes */ + sigfillset(&manager_mask); + sigdelset(&manager_mask, __pthread_sig_cancel); /* for thread termination */ + sigdelset(&manager_mask, SIGTRAP); /* for debugging purposes */ if (__pthread_threads_debug && __pthread_sig_debug > 0) - sigdelset(&mask, __pthread_sig_debug); - sigprocmask(SIG_SETMASK, &mask, NULL); + sigdelset(&manager_mask, __pthread_sig_debug); + sigprocmask(SIG_SETMASK, &manager_mask, NULL); +#ifdef NEED_SEPARATE_REGISTER_STACK + sigfillset(&manager_mask_all); +#endif /* Raise our priority to match that of main thread */ __pthread_manager_adjust_prio(__pthread_main_thread->p_priority); /* Synchronize debugging of the thread manager */ @@ -294,7 +306,16 @@ static int pthread_allocate_stack(const pthread_attr_t *attr, if (attr != NULL && attr->__stackaddr_set) { - /* The user provided a stack. */ + /* The user provided a stack. For now we interpret the supplied + address as 1 + the highest addr. in the stack segment. If a + separate register stack is needed, we place it at the low end + of the segment, relying on the associated stacksize to + determine the low end of the segment. This differs from many + (but not all) other pthreads implementations. The intent is + that on machines with a single stack growing toward higher + addresses, stackaddr would be the lowest address in the stack + segment, so that it is consistently close to the initial sp + value. */ new_thread = (pthread_descr) ((long)(attr->__stackaddr) & -sizeof(void *)) - 1; new_thread_bottom = (char *) attr->__stackaddr - attr->__stacksize; @@ -304,11 +325,57 @@ static int pthread_allocate_stack(const pthread_attr_t *attr, } else { - stacksize = STACK_SIZE - pagesize; - if (attr != NULL) - stacksize = MIN (stacksize, roundup(attr->__stacksize, pagesize)); +#ifdef NEED_SEPARATE_REGISTER_STACK + size_t granularity = 2 * pagesize; + /* Try to make stacksize/2 a multiple of pagesize */ +#else + size_t granularity = pagesize; +#endif /* Allocate space for stack and thread descriptor at default address */ + if (attr != NULL) + { + guardsize = page_roundup (attr->__guardsize, granularity); + stacksize = STACK_SIZE - guardsize; + stacksize = MIN (stacksize, + page_roundup (attr->__stacksize, granularity)); + } + else + { + guardsize = granularity; + stacksize = STACK_SIZE - granularity; + } new_thread = default_new_thread; +#ifdef NEED_SEPARATE_REGISTER_STACK + new_thread_bottom = (char *) (new_thread + 1) - stacksize - guardsize; + /* Includes guard area, unlike the normal case. Use the bottom + end of the segment as backing store for the register stack. + Needed on IA64. In this case, we also map the entire stack at + once. According to David Mosberger, that's cheaper. It also + avoids the risk of intermittent failures due to other mappings + in the same region. The cost is that we might be able to map + slightly fewer stacks. */ + + /* First the main stack: */ + if (mmap((caddr_t)((char *)(new_thread + 1) - stacksize / 2), + stacksize / 2, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) + == MAP_FAILED) + /* Bad luck, this segment is already mapped. */ + return -1; + /* Then the register stack: */ + if (mmap((caddr_t)new_thread_bottom, stacksize/2, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) + == MAP_FAILED) + { + munmap((caddr_t)((char *)(new_thread + 1) - stacksize/2), + stacksize/2); + return -1; + } + + guardaddr = new_thread_bottom + stacksize/2; + /* We leave the guard area in the middle unmapped. */ +#else /* !NEED_SEPARATE_REGISTER_STACK */ new_thread_bottom = (char *) (new_thread + 1) - stacksize; if (mmap((caddr_t)((char *)(new_thread + 1) - INITIAL_STACK_SIZE), INITIAL_STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, @@ -317,10 +384,10 @@ static int pthread_allocate_stack(const pthread_attr_t *attr, /* Bad luck, this segment is already mapped. */ return -1; /* We manage to get a stack. Now see whether we need a guard - and allocate it if necessary. Notice that the default - attributes (stack_size = STACK_SIZE - pagesize) do not need - a guard page, since the RLIMIT_STACK soft limit prevents stacks - from running into one another. */ + and allocate it if necessary. Notice that the default + attributes (stack_size = STACK_SIZE - pagesize and guardsize + = pagesize) do not need a guard page, since the RLIMIT_STACK + soft limit prevents stacks from running into one another. */ if (stacksize == STACK_SIZE - pagesize) { /* We don't need a guard page. */ @@ -330,7 +397,6 @@ static int pthread_allocate_stack(const pthread_attr_t *attr, else { /* Put a bad page at the bottom of the stack */ - guardsize = attr->__guardsize; guardaddr = (void *)new_thread_bottom - guardsize; if (mmap ((caddr_t) guardaddr, guardsize, 0, MAP_FIXED, -1, 0) == MAP_FAILED) @@ -340,6 +406,7 @@ static int pthread_allocate_stack(const pthread_attr_t *attr, guardsize = 0; } } +#endif /* !NEED_SEPARATE_REGISTER_STACK */ } /* Clear the thread data structure. */ memset (new_thread, '\0', sizeof (*new_thread)); @@ -452,9 +519,30 @@ static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr, __pthread_lock(new_thread->p_lock, NULL); /* We have to report this event. */ +#ifdef NEED_SEPARATE_REGISTER_STACK + /* Perhaps this version should be used on all platforms. But + this requires that __clone2 be uniformly supported + everywhere. + + And there is some argument for changing the __clone2 + interface to pass sp and bsp instead, making it more IA64 + specific, but allowing stacks to grow outward from each + other, to get less paging and fewer mmaps. Clone2 + currently can't take signals in the child right after + process creation. Mask them in the child. It resets the + mask once it starts up. */ + sigprocmask(SIG_SETMASK, &manager_mask_all, NULL); + pid = __clone2(pthread_start_thread_event, + (void **)new_thread_bottom, + (char *)new_thread - new_thread_bottom, + CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | + __pthread_sig_cancel, new_thread); + sigprocmask(SIG_SETMASK, &manager_mask, NULL); +#else pid = __clone(pthread_start_thread_event, (void **) new_thread, CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | __pthread_sig_cancel, new_thread); +#endif if (pid != -1) { /* Now fill in the information about the new thread in @@ -479,18 +567,38 @@ static int pthread_handle_create(pthread_t *thread, const pthread_attr_t *attr, } } if (pid == 0) - pid = __clone(pthread_start_thread, (void **) new_thread, - CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - __pthread_sig_cancel, new_thread); + { +#ifdef NEED_SEPARATE_REGISTER_STACK + sigprocmask(SIG_SETMASK, &manager_mask_all, NULL); + pid = __clone2(pthread_start_thread, + (void **)new_thread_bottom, + (char *)new_thread - new_thread_bottom, + CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | + __pthread_sig_cancel, new_thread); + sigprocmask(SIG_SETMASK, &manager_mask, NULL); +#else + pid = __clone(pthread_start_thread, (void **) new_thread, + CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | + __pthread_sig_cancel, new_thread); +#endif /* !NEED_SEPARATE_REGISTER_STACK */ + } /* Check if cloning succeeded */ if (pid == -1) { /* Free the stack if we allocated it */ if (attr == NULL || !attr->__stackaddr_set) { +#ifdef NEED_SEPARATE_REGISTER_STACK + size_t stacksize = ((char *)(new_thread->p_guardaddr) + - new_thread_bottom); + munmap((caddr_t)new_thread_bottom, stacksize); + munmap((caddr_t)new_thread_bottom + stacksize + + new_thread->p_guardsize, stacksize); +#else if (new_thread->p_guardsize != 0) munmap(new_thread->p_guardaddr, new_thread->p_guardsize); munmap((caddr_t)((char *)(new_thread+1) - INITIAL_STACK_SIZE), INITIAL_STACK_SIZE); +#endif } __pthread_handles[sseg].h_descr = NULL; __pthread_handles[sseg].h_bottom = NULL; @@ -550,10 +658,27 @@ static void pthread_free(pthread_descr th) if (th == &__pthread_initial_thread) return; if (!th->p_userstack) { + size_t guardsize = th->p_guardsize; /* Free the stack and thread descriptor area */ - if (th->p_guardsize != 0) - munmap(th->p_guardaddr, th->p_guardsize); +#ifdef NEED_SEPARATE_REGISTER_STACK + char *guardaddr = th->p_guardaddr; + /* We unmap exactly what we mapped, in case there was something + else in the same region. Guardaddr is always set, eve if + guardsize is 0. This allows us to compute everything else. */ + size_t stacksize = (char *)(th+1) - guardaddr - guardsize; + /* Unmap the register stack, which is below guardaddr. */ + munmap((caddr_t)(guardaddr-stacksize), stacksize); + /* Unmap the main stack. */ + munmap((caddr_t)(guardaddr+guardsize), stacksize); +#else + /* The following assumes that we only allocate stacks of one + size. That's currently true but probably shouldn't be. This + looks like it fails for growing stacks if there was something + else mapped just below the stack? */ + if (guardsize != 0) + munmap(th->p_guardaddr, guardsize); munmap((caddr_t) ((char *)(th+1) - STACK_SIZE), STACK_SIZE); +#endif } } diff --git a/linuxthreads/pthread.c b/linuxthreads/pthread.c index 2700a29..d70e3f4 100644 --- a/linuxthreads/pthread.c +++ b/linuxthreads/pthread.c @@ -362,7 +362,13 @@ static void pthread_initialize(void) /* Play with the stack size limit to make sure that no stack ever grows beyond STACK_SIZE minus one page (to act as a guard page). */ getrlimit(RLIMIT_STACK, &limit); +#ifdef NEED_SEPARATE_REGISTER_STACK + /* STACK_SIZE bytes hold both the main stack and register backing + store. The rlimit value applies to each individually. */ + max_stack = STACK_SIZE/2 - __getpagesize(); +#else max_stack = STACK_SIZE - __getpagesize(); +#endif if (limit.rlim_cur > max_stack) { limit.rlim_cur = max_stack; setrlimit(RLIMIT_STACK, &limit); @@ -444,10 +450,18 @@ int __pthread_initialize_manager(void) | __pthread_initial_thread.p_eventbuf.eventmask.event_bits[idx])) != 0) { +#ifdef NEED_SEPARATE_REGISTER_STACK + pid = __clone2(__pthread_manager_event, + (void **) __pthread_manager_thread_bos, + THREAD_MANAGER_STACK_SIZE, + CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND, + (void *)(long)manager_pipe[0]); +#else pid = __clone(__pthread_manager_event, (void **) __pthread_manager_thread_tos, CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND, (void *)(long)manager_pipe[0]); +#endif if (pid != -1) { @@ -472,9 +486,18 @@ int __pthread_initialize_manager(void) } if (pid == 0) - pid = __clone(__pthread_manager, (void **) __pthread_manager_thread_tos, - CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND, - (void *)(long)manager_pipe[0]); + { +#ifdef NEED_SEPARATE_REGISTER_STACK + pid = __clone2(__pthread_manager, (void **) __pthread_manager_thread_bos, + THREAD_MANAGER_STACK_SIZE, + CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND, + (void *)(long)manager_pipe[0]); +#else + pid = __clone(__pthread_manager, (void **) __pthread_manager_thread_tos, + CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND, + (void *)(long)manager_pipe[0]); +#endif + } if (pid == -1) { free(__pthread_manager_thread_bos); __libc_close(manager_pipe[0]); diff --git a/linuxthreads/sysdeps/ia64/pt-machine.h b/linuxthreads/sysdeps/ia64/pt-machine.h new file mode 100644 index 0000000..58cccc2 --- /dev/null +++ b/linuxthreads/sysdeps/ia64/pt-machine.h @@ -0,0 +1,106 @@ +/* Machine-dependent pthreads configuration and inline functions. + IA-64 version. + Copyright (C) 1999, 2000 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifndef PT_EI +# define PT_EI extern inline +#endif + +/* Make sure gcc doesn't try to be clever and move things around on + us. We need to use _exactly_ the address the user gave us, not some + alias that contains the same information. */ +#define __atomic_fool_gcc(x) (*(volatile struct { int a[100]; } *)x) + +#ifndef ELF_MACHINE_NAME + +#define NEED_SEPARATE_REGISTER_STACK + +/* Get some notion of the current stack. Need not be exactly the top + of the stack, just something somewhere in the current frame. + r12 (sp) is the stack pointer. */ +#define CURRENT_STACK_FRAME stack_pointer +register char *stack_pointer __asm__ ("sp"); + + +/* Register r13 (tp) is reserved by the ABI as "thread pointer". */ +struct _pthread_descr_struct; +register struct _pthread_descr_struct *__thread_self __asm__("r13"); + +/* Return the thread descriptor for the current thread. */ +#define THREAD_SELF __thread_self + +/* Initialize the thread-unique value. */ +#define INIT_THREAD_SELF(descr, nr) (__thread_self = (descr)) + + +/* Access to data in the thread descriptor is easy. */ +#define THREAD_GETMEM(descr, member) __thread_self->member +#define THREAD_GETMEM_NC(descr, member) __thread_self->member +#define THREAD_SETMEM(descr, member, value) __thread_self->member = (value) +#define THREAD_SETMEM_NC(descr, member, value) __thread_self->member = (value) + + +#define HAS_COMPARE_AND_SWAP_WITH_RELEASE_SEMANTICS + +PT_EI long int +__compare_and_swap (long int *p, long int oldval, long int newval) +{ + long int readval; + + __asm__ __volatile__ + ("mov ar.ccv=%4;;\n\t" + "cmpxchg8.acq %0=%1,%2,ar.ccv" + : "=r" (readval), "=m" (__atomic_fool_gcc (p)) + : "r"(newval), "1" (__atomic_fool_gcc (p)), "r" (oldval) + : "memory"); + return readval == oldval; +} + +PT_EI long int +__compare_and_swap_with_release_semantics (long int *p, + long int oldval, + long int newval) +{ + long int readval; + + __asm__ __volatile__ + ("mov ar.ccv=%4;;\n\t" + "cmpxchg8.rel %0=%1,%2,ar.ccv" + : "=r" (readval), "=m" (__atomic_fool_gcc (p)) + : "r"(newval), "1" (__atomic_fool_gcc (p)), "r" (oldval) + : "memory"); + return readval == oldval; +} + +#endif /* ELF_MACHINE_NAME */ + +/* Spinlock implementation; required. */ +PT_EI long int +testandset (int *spinlock) +{ + long int ret; + + __asm__ __volatile__( + "xchg4 %0=%1,%2" + : "=r"(ret), "=m"(__atomic_fool_gcc (spinlock)) + : "r"(1), "1"(__atomic_fool_gcc (spinlock)) + : "memory"); + + return ret; +} -- cgit v1.1