From 1945c96f2b0ad2bf09b1a2e182d2d0d98a30698e Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 3 Oct 2004 01:21:47 +0000 Subject: Update, * nscd/connections.c: Rewrite handling of incoming connections. All are handled by one thread which then hands of the descriptors for the real work to the worker threads. * nscd/Makefile: Link nscd with librt. * nscd/selinux.c: Pretty printing. * nscd/dbg_log.c (dbg_log): Don't add unnecessary newline to output. Let syslog do the formatting if debug_level == 0. --- nscd/Makefile | 6 +- nscd/connections.c | 379 ++++++++++++++++++++++++++++++++++++++++++----------- nscd/dbg_log.c | 9 +- nscd/selinux.c | 4 +- 4 files changed, 314 insertions(+), 84 deletions(-) (limited to 'nscd') diff --git a/nscd/Makefile b/nscd/Makefile index a40a455..95fd1ea 100644 --- a/nscd/Makefile +++ b/nscd/Makefile @@ -112,9 +112,11 @@ $(objpfx)nscd: $(nscd-modules:%=$(objpfx)%.o) $(objpfx)nscd_nischeck: $(objpfx)nscd_nischeck.o ifeq ($(build-shared),yes) -$(objpfx)nscd: $(shared-thread-library) $(common-objpfx)nis/libnsl.so +$(objpfx)nscd: $(common-objpfx)rt/librt.so $(shared-thread-library) \ + $(common-objpfx)nis/libnsl.so $(objpfx)nscd_nischeck: $(common-objpfx)nis/libnsl.so else -$(objpfx)nscd: $(static-thread-library) $(common-objpfx)nis/libnsl.a +$(objpfx)nscd: $(common-objpfx)rt/librt.a $(static-thread-library) \ + $(common-objpfx)nis/libnsl.a $(objpfx)nscd_nischeck: $(common-objpfx)nis/libnsl.a endif diff --git a/nscd/connections.c b/nscd/connections.c index 5de7c3b..b658fddb 100644 --- a/nscd/connections.c +++ b/nscd/connections.c @@ -785,91 +785,120 @@ cannot handle old request version %d; current version is %d"), } +/* List of file descriptors. */ +struct fdlist +{ + int fd; + struct fdlist *next; +}; +/* Memory allocated for the list. */ +static struct fdlist *fdlist; +/* List of currently ready-to-read file descriptors. */ +static struct fdlist *readylist; + +/* Conditional variable and mutex to signal availability of entries in + READYLIST. The condvar is initialized dynamically since we might + use a different clock depending on availability. */ +static pthread_cond_t readylist_cond; +static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER; + +/* The clock to use with the condvar. */ +static clockid_t timeout_clock = CLOCK_REALTIME; + +/* Number of threads ready to handle the READYLIST. */ +static unsigned long int nready; + + /* This is the main loop. It is replicated in different threads but the `poll' call makes sure only one thread handles an incoming connection. */ static void * __attribute__ ((__noreturn__)) nscd_run (void *p) { - long int my_number = (long int) p; - struct pollfd conn; - int run_prune = my_number < lastdb && dbs[my_number].enabled; - time_t next_prune = run_prune ? time (NULL) + CACHE_PRUNE_INTERVAL : 0; - static unsigned long int nready; + const long int my_number = (long int) p; + const int run_prune = my_number < lastdb && dbs[my_number].enabled; + struct timespec prune_ts; + int to = 0; + char buf[256]; if (run_prune) - setup_thread (&dbs[my_number]); + { + setup_thread (&dbs[my_number]); - conn.fd = sock; - conn.events = POLLRDNORM; + /* We are running. */ + dbs[my_number].head->timestamp = time (NULL); - while (1) - { - int nr; - time_t now = 0; + if (clock_gettime (timeout_clock, &prune_ts) == -1) + /* Should never happen. */ + abort (); - /* One more thread available. */ - atomic_increment (&nready); + /* Compute timeout time. */ + prune_ts.tv_sec += CACHE_PRUNE_INTERVAL; + } + + /* Initial locking. */ + pthread_mutex_lock (&readylist_lock); + + /* One more thread available. */ + ++nready; - no_conn: - do + while (1) + { + while (readylist == NULL) { - int timeout = -1; if (run_prune) { - /* NB: we do not flush the timestamp update using msync since - this value doesnot matter on disk. */ - dbs[my_number].head->timestamp = now = time (NULL); - timeout = now < next_prune ? 1000 * (next_prune - now) : 0; + /* Wait, but not forever. */ + to = pthread_cond_timedwait (&readylist_cond, &readylist_lock, + &prune_ts); + + /* If we were woken and there is no work to be done, + just start pruning. */ + if (readylist == NULL && to == ETIMEDOUT) + { + pthread_mutex_unlock (&readylist_lock); + goto only_prune; + } } + else + /* No need to timeout. */ + pthread_cond_wait (&readylist_cond, &readylist_lock); + } - nr = poll (&conn, 1, timeout); + struct fdlist *it = readylist->next; + if (readylist->next == readylist) + /* Just one entry on the list. */ + readylist = NULL; + else + readylist->next = it->next; - if (nr == 0) - { - /* The `poll' call timed out. It's time to clean up the - cache. */ - atomic_decrement (&nready); - assert (my_number < lastdb); - prune_cache (&dbs[my_number], time(NULL)); - now = time (NULL); - next_prune = now + CACHE_PRUNE_INTERVAL; - - goto try_get; - } - } - while ((conn.revents & POLLRDNORM) == 0); + /* Extract the information and mark the record ready to be used + again. */ + int fd = it->fd; + it->next = NULL; - got_data:; - /* We have a new incoming connection. Accept the connection. */ - int fd = TEMP_FAILURE_RETRY (accept (conn.fd, NULL, NULL)); - request_header req; - char buf[256]; - uid_t uid = -1; -#ifdef SO_PEERCRED - pid_t pid = 0; -#endif + /* One more thread available. */ + --nready; - if (__builtin_expect (fd, 0) < 0) - { - if (errno != EAGAIN && errno != EWOULDBLOCK) - dbg_log (_("while accepting connection: %s"), - strerror_r (errno, buf, sizeof (buf))); - goto no_conn; - } + /* We are done with the list. */ + pthread_mutex_unlock (&readylist_lock); - /* This thread is busy. */ - atomic_decrement (&nready); + /* We do not want to block on a short read or so. */ + int fl = fcntl (fd, F_GETFL); + if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1) + goto close_and_out; /* Now read the request. */ + request_header req; if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req))) != sizeof (req), 0)) { + /* We failed to read data. Note that this also might mean we + failed because we would have blocked. */ if (debug_level > 0) dbg_log (_("short read while reading request: %s"), strerror_r (errno, buf, sizeof (buf))); - close (fd); - continue; + goto close_and_out; } /* Check whether this is a valid request type. */ @@ -878,7 +907,10 @@ nscd_run (void *p) /* Some systems have no SO_PEERCRED implementation. They don't care about security so we don't as well. */ + uid_t uid = -1; #ifdef SO_PEERCRED + pid_t pid = 0; + if (secure_in_use) { struct ucred caller; @@ -909,8 +941,9 @@ nscd_run (void *p) /* It should not be possible to crash the nscd with a silly request (i.e., a terribly large key). We limit the size to 1kb. */ +#define MAXKEYLEN 1024 if (__builtin_expect (req.key_len, 1) < 0 - || __builtin_expect (req.key_len, 1) > 1024) + || __builtin_expect (req.key_len, 1) > MAXKEYLEN) { if (debug_level > 0) dbg_log (_("key length in request too long: %d"), req.key_len); @@ -918,17 +951,17 @@ nscd_run (void *p) else { /* Get the key. */ - char keybuf[req.key_len]; + char keybuf[MAXKEYLEN]; if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf, req.key_len)) != req.key_len, 0)) { + /* Again, this can also mean we would have blocked. */ if (debug_level > 0) dbg_log (_("short read while reading request key: %s"), strerror_r (errno, buf, sizeof (buf))); - close (fd); - continue; + goto close_and_out; } if (__builtin_expect (debug_level, 0) > 0) @@ -952,18 +985,193 @@ handle_request: request received (Version = %d)"), req.version); /* We are done. */ close (fd); - /* Just determine whether any data is present. We do this to - measure whether clients are queued up. */ - try_get: - nr = poll (&conn, 1, 0); - if (nr != 0) + /* Check whether we should be pruning the cache. */ + assert (run_prune || to == 0); + if (to == ETIMEDOUT) + { + only_prune: + /* The pthread_cond_timedwait() call timed out. It is time + to clean up the cache. */ + assert (my_number < lastdb); + prune_cache (&dbs[my_number], + prune_ts.tv_sec + (prune_ts.tv_nsec >= 500000000)); + + if (clock_gettime (timeout_clock, &prune_ts) == -1) + /* Should never happen. */ + abort (); + + /* Compute next timeout time. */ + prune_ts.tv_sec += CACHE_PRUNE_INTERVAL; + + /* In case the list is emtpy we do not want to run the prune + code right away again. */ + to = 0; + } + + /* Re-locking. */ + pthread_mutex_lock (&readylist_lock); + + /* One more thread available. */ + ++nready; + } +} + + +static void +__attribute__ ((__noreturn__)) +main_loop (void) +{ + /* Determine how much room for descriptors we should initially + allocate. This might need to change later if we cap the number + with MAXCONN. */ + const long int nfds = sysconf (_SC_OPEN_MAX); + unsigned int nconns; +#define MINCONN 32 +#define MAXCONN 16384 + if (nfds == -1 || nfds > MAXCONN) + nconns = MAXCONN; + else if (nfds < MINCONN) + nconns = MINCONN; + else + nconns = nfds; + + struct pollfd *conns = (struct pollfd *) xmalloc (nconns + * sizeof (conns[0])); + + /* We need two mirroring arrays filled with the times the connection + was accepted and a place to pass descriptors on to the worker + threads. We cannot put this in the same data structure as the + CONNS data since CONNS is passed as an array to poll(). */ + time_t *starttime = (time_t *) xmalloc (nconns * sizeof (starttime[0])); + fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0])); + + conns[0].fd = sock; + conns[0].events = POLLRDNORM; + size_t nused = 1; + size_t firstfree = 1; + + while (1) + { + /* Wait for any event. We wait at most a couple of seconds so + that we can check whether we should close any of the accepted + connections since we have not received a request. */ +#define MAX_ACCEPT_TIMEOUT 30 +#define MIN_ACCEPT_TIMEOUT 5 +#define MAIN_THREAD_TIMEOUT \ + (MAX_ACCEPT_TIMEOUT * 1000 \ + - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns)) + + int n = poll (conns, nused, MAIN_THREAD_TIMEOUT); + + time_t now = time (NULL); + + /* If there is a descriptor ready for reading or there is a new + connection, process this now. */ + if (n > 0) { - if (nready == 0) - ++client_queued; + if (conns[0].revents != 0) + { + /* We have a new incoming connection. Accept the connection. */ + int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL)); + + if (fd >= 0) + { + /* We have a new file descriptor. Keep it around + and wait until data becomes available. */ + if (firstfree == nconns) + { + // XXX Maybe extend array. For now, reject + close (fd); + goto reject_out; + } + + conns[firstfree].fd = fd; + conns[firstfree].events = POLLRDNORM; + starttime[firstfree] = now; + if (firstfree >= nused) + nused = firstfree + 1; + + do + ++firstfree; + while (firstfree < nused && conns[firstfree].fd != -1); + } + + reject_out: + --n; + } + + for (size_t cnt = 1; cnt < nused && n > 0; ++cnt) + if (conns[cnt].revents != 0) + { + pthread_mutex_lock (&readylist_lock); + + /* Find an empty entry in FDLIST. */ + size_t inner; + for (inner = 0; inner < nconns; ++inner) + if (fdlist[inner].next == NULL) + break; + assert (inner < nconns); + + fdlist[inner].fd = conns[cnt].fd; + + if (readylist == NULL) + readylist = fdlist[inner].next = &fdlist[inner]; + else + { + fdlist[inner].next = readylist->next; + readylist = readylist->next = &fdlist[inner]; + } + + bool do_signal = true; + if (__builtin_expect (nready == 0, 0)) + { + ++client_queued; + do_signal = false; + } - atomic_increment (&nready); + pthread_mutex_unlock (&readylist_lock); - goto got_data; + /* Tell one of the worker threads there is work to do. */ + if (do_signal) + pthread_cond_signal (&readylist_cond); + + /* Clean up the CONNS array. */ + conns[cnt].fd = -1; + if (cnt < firstfree) + firstfree = cnt; + if (cnt == nused - 1) + do + --nused; + while (conns[nused - 1].fd == -1); + + --n; + } + } + + /* Now find entries which have timed out. */ + assert (nused > 0); + for (size_t cnt = nused - 1; cnt > 0; --cnt) + { + /* We make the timeout length depend on the number of file + descriptors currently used. */ +#define ACCEPT_TIMEOUT \ + (MAX_ACCEPT_TIMEOUT \ + - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns) + + time_t laststart = now - ACCEPT_TIMEOUT; + if (conns[cnt].fd != -1 && starttime[cnt] < laststart) + { + /* Remove the entry, it timed out. */ + (void) close (conns[cnt].fd); + conns[cnt].fd = -1; + + if (cnt < firstfree) + firstfree = cnt; + if (cnt == nused - 1) + do + --nused; + while (conns[nused - 1].fd == -1); + } } } } @@ -973,10 +1181,26 @@ handle_request: request received (Version = %d)"), req.version); void start_threads (void) { - long int i; - pthread_attr_t attr; - pthread_t th; + /* Initialize the conditional variable we will use. The only + non-standard attribute we might use is the clock selection. */ + pthread_condattr_t condattr; + pthread_condattr_init (&condattr); + +#if _POSIX_CLOCK_SELECTION >= 0 && _POSIX_MONOTONIC_CLOCK >= 0 + /* Determine whether the monotonous clock is available. */ + struct timespec dummy; + if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0 + && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0) + timeout_clock = CLOCK_MONOTONIC; +#endif + pthread_cond_init (&readylist_cond, &condattr); + pthread_condattr_destroy (&condattr); + + + /* Create the attribute for the threads. They are all created + detached. */ + pthread_attr_t attr; pthread_attr_init (&attr); pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); @@ -984,12 +1208,17 @@ start_threads (void) if (debug_level == 0) nthreads = MAX (nthreads, lastdb); - for (i = 1; i < nthreads; ++i) - pthread_create (&th, &attr, nscd_run, (void *) i); + for (long int i = 0; i < nthreads; ++i) + { + pthread_t th; + pthread_create (&th, &attr, nscd_run, (void *) i); + } pthread_attr_destroy (&attr); - nscd_run ((void *) 0); + /* In the main thread we execute the loop which handles incoming + connections. */ + main_loop (); } diff --git a/nscd/dbg_log.c b/nscd/dbg_log.c index bcd9426..afa06dc 100644 --- a/nscd/dbg_log.c +++ b/nscd/dbg_log.c @@ -61,7 +61,8 @@ dbg_log (const char *fmt,...) if (debug_level > 0) { - snprintf (msg, sizeof (msg), "%d: %s\n", getpid (), msg2); + snprintf (msg, sizeof (msg), "%d: %s%s", getpid (), msg2, + msg2[strlen (msg2) - 1] == '\n' ? "" : "\n"); if (dbgout) { fputs (msg, dbgout); @@ -71,9 +72,7 @@ dbg_log (const char *fmt,...) fputs (msg, stderr); } else - { - snprintf (msg, sizeof (msg), "%d: %s", getpid (), msg2); - syslog (LOG_NOTICE, "%s", msg); - } + syslog (LOG_NOTICE, "%d %s", getpid (), msg2); + va_end (ap); } diff --git a/nscd/selinux.c b/nscd/selinux.c index 77651e0..f57f092 100644 --- a/nscd/selinux.c +++ b/nscd/selinux.c @@ -207,8 +207,8 @@ nscd_request_avc_has_perm (int fd, request_type req) dbg_log (_("Error getting context of nscd")); goto out; } - if (avc_context_to_sid (scon, &ssid) < 0 || - avc_context_to_sid (tcon, &tsid) < 0) + if (avc_context_to_sid (scon, &ssid) < 0 + || avc_context_to_sid (tcon, &tsid) < 0) { dbg_log (_("Error getting sid from context")); goto out; -- cgit v1.1