1 files changed, 144 insertions, 100 deletions
diff --git a/libgomp/work.c b/libgomp/work.c
index cd20c9d..b48a5e3 100644
--- a/libgomp/work.c
+++ b/libgomp/work.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2005, 2008 Free Software Foundation, Inc.
    Contributed by Richard Henderson <rth@redhat.com>.
 
    This file is part of the GNU OpenMP Library (libgomp).
@@ -29,39 +29,138 @@
    of threads.  */
 
 #include "libgomp.h"
+#include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
 
 
-/* Create a new work share structure.  */
+/* Allocate a new work share structure, preferably from current team's
+   free gomp_work_share cache.  */
 
-struct gomp_work_share *
-gomp_new_work_share (bool ordered, unsigned nthreads)
+static struct gomp_work_share *
+alloc_work_share (struct gomp_team *team)
 {
   struct gomp_work_share *ws;
-  size_t size;
+  unsigned int i;
 
-  size = sizeof (*ws);
-  if (ordered)
-    size += nthreads * sizeof (ws->ordered_team_ids[0]);
+  /* This is called in a critical section.  */
+  if (team->work_share_list_alloc != NULL)
+    {
+      ws = team->work_share_list_alloc;
+      team->work_share_list_alloc = ws->next_free;
+      return ws;
+    }
 
-  ws = gomp_malloc_cleared (size);
-  gomp_mutex_init (&ws->lock);
-  ws->ordered_owner = -1;
+#ifdef HAVE_SYNC_BUILTINS
+  ws = team->work_share_list_free;
+  /* We need atomic read from work_share_list_free,
+     as free_work_share can be called concurrently.  */
+  __asm ("" : "+r" (ws));
+
+  if (ws && ws->next_free)
+    {
+      struct gomp_work_share *next = ws->next_free;
+      ws->next_free = NULL;
+      team->work_share_list_alloc = next->next_free;
+      return next;
+    }
+#else
+  gomp_mutex_lock (&team->work_share_list_free_lock);
+  ws = team->work_share_list_free;
+  if (ws)
+    {
+      team->work_share_list_alloc = ws->next_free;
+      team->work_share_list_free = NULL;
+      gomp_mutex_unlock (&team->work_share_list_free_lock);
+      return ws;
+    }
+  gomp_mutex_unlock (&team->work_share_list_free_lock);
+#endif
 
+  team->work_share_chunk *= 2;
+  ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
+  ws->next_alloc = team->work_shares[0].next_alloc;
+  team->work_shares[0].next_alloc = ws;
+  team->work_share_list_alloc = &ws[1];
+  for (i = 1; i < team->work_share_chunk - 1; i++)
+    ws[i].next_free = &ws[i + 1];
+  ws[i].next_free = NULL;
   return ws;
 }
 
+/* Initialize an already allocated struct gomp_work_share.
+   This shouldn't touch the next_alloc field.  */
+
+void
+gomp_init_work_share (struct gomp_work_share *ws, bool ordered,
+		      unsigned nthreads)
+{
+  gomp_mutex_init (&ws->lock);
+  if (__builtin_expect (ordered, 0))
+    {
+#define INLINE_ORDERED_TEAM_IDS_CNT \
+  ((sizeof (struct gomp_work_share) \
+    - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \
+   / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0]))
+
+      if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT)
+	ws->ordered_team_ids
+	  = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids));
+      else
+	ws->ordered_team_ids = ws->inline_ordered_team_ids;
+      memset (ws->ordered_team_ids, '\0',
+	      nthreads * sizeof (*ws->ordered_team_ids));
+      ws->ordered_num_used = 0;
+      ws->ordered_owner = -1;
+      ws->ordered_cur = 0;
+    }
+  else
+    ws->ordered_team_ids = NULL;
+  gomp_ptrlock_init (&ws->next_ws, NULL);
+  ws->threads_completed = 0;
+}
 
-/* Free a work share structure.  */
+/* Do any needed destruction of gomp_work_share fields before it
+   is put back into free gomp_work_share cache or freed.  */
 
-static void
-free_work_share (struct gomp_work_share *ws)
+void
+gomp_fini_work_share (struct gomp_work_share *ws)
 {
   gomp_mutex_destroy (&ws->lock);
-  free (ws);
+  if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
+    free (ws->ordered_team_ids);
+  gomp_ptrlock_destroy (&ws->next_ws);
 }
 
+/* Free a work share struct, if not orphaned, put it into current
+   team's free gomp_work_share cache.  */
+
+static inline void
+free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
+{
+  gomp_fini_work_share (ws);
+  if (__builtin_expect (team == NULL, 0))
+    free (ws);
+  else
+    {
+      struct gomp_work_share *next_ws;
+#ifdef HAVE_SYNC_BUILTINS
+      do
+	{
+	  next_ws = team->work_share_list_free;
+	  ws->next_free = next_ws;
+	}
+      while (!__sync_bool_compare_and_swap (&team->work_share_list_free,
+					    next_ws, ws));
+#else
+      gomp_mutex_lock (&team->work_share_list_free_lock);
+      next_ws = team->work_share_list_free;
+      ws->next_free = next_ws;
+      team->work_share_list_free = ws;
+      gomp_mutex_unlock (&team->work_share_list_free_lock);
+#endif
+    }
+}
 
 /* The current thread is ready to begin the next work sharing construct.
    In all cases, thr->ts.work_share is updated to point to the new
@@ -74,71 +173,34 @@ gomp_work_share_start (bool ordered)
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
   struct gomp_work_share *ws;
-  unsigned ws_index, ws_gen;
 
   /* Work sharing constructs can be orphaned.  */
   if (team == NULL)
     {
-      ws = gomp_new_work_share (ordered, 1);
+      ws = gomp_malloc (sizeof (*ws));
+      gomp_init_work_share (ws, ordered, 1);
       thr->ts.work_share = ws;
-      thr->ts.static_trip = 0;
-      gomp_mutex_lock (&ws->lock);
-      return true;
+      return ws;
     }
 
-  gomp_mutex_lock (&team->work_share_lock);
-
-  /* This thread is beginning its next generation.  */
-  ws_gen = ++thr->ts.work_share_generation;
-
-  /* If this next generation is not newer than any other generation in
-     the team, then simply reference the existing construct.  */
-  if (ws_gen - team->oldest_live_gen < team->num_live_gen)
+  ws = thr->ts.work_share;
+  thr->ts.last_work_share = ws;
+  ws = gomp_ptrlock_get (&ws->next_ws);
+  if (ws == NULL)
     {
-      ws_index = ws_gen & team->generation_mask;
-      ws = team->work_shares[ws_index];
+      /* This thread encountered a new ws first.  */
+      struct gomp_work_share *ws = alloc_work_share (team);
+      gomp_init_work_share (ws, ordered, team->nthreads);
       thr->ts.work_share = ws;
-      thr->ts.static_trip = 0;
-
-      gomp_mutex_lock (&ws->lock);
-      gomp_mutex_unlock (&team->work_share_lock);
-
-      return false;
+      return true;
     }
-
-  /* Resize the work shares queue if we've run out of space.  */
-  if (team->num_live_gen++ == team->generation_mask)
+  else
     {
-      team->work_shares = gomp_realloc (team->work_shares,
-					2 * team->num_live_gen
-					* sizeof (*team->work_shares));
-
-      /* Unless oldest_live_gen is zero, the sequence of live elements
-	 wraps around the end of the array.  If we do nothing, we break
-	 lookup of the existing elements.  Fix that by unwrapping the
-	 data from the front to the end.  */
-      if (team->oldest_live_gen > 0)
-	memcpy (team->work_shares + team->num_live_gen,
-		team->work_shares,
-		(team->oldest_live_gen & team->generation_mask)
-		* sizeof (*team->work_shares));
-
-      team->generation_mask = team->generation_mask * 2 + 1;
+      thr->ts.work_share = ws;
+      return false;
     }
-
-  ws_index = ws_gen & team->generation_mask;
-  ws = gomp_new_work_share (ordered, team->nthreads);
-  thr->ts.work_share = ws;
-  thr->ts.static_trip = 0;
-  team->work_shares[ws_index] = ws;
-
-  gomp_mutex_lock (&ws->lock);
-  gomp_mutex_unlock (&team->work_share_lock);
-
-  return true;
 }
 
-
 /* The current thread is done with its current work sharing construct.
    This version does imply a barrier at the end of the work-share.  */
 
@@ -147,36 +209,28 @@ gomp_work_share_end (void)
 {
   struct gomp_thread *thr = gomp_thread ();
   struct gomp_team *team = thr->ts.team;
-  struct gomp_work_share *ws = thr->ts.work_share;
-  bool last;
-
-  thr->ts.work_share = NULL;
+  gomp_barrier_state_t bstate;
 
   /* Work sharing constructs can be orphaned.  */
   if (team == NULL)
     {
-      free_work_share (ws);
+      free_work_share (NULL, thr->ts.work_share);
+      thr->ts.work_share = NULL;
       return;
     }
 
-  last = gomp_barrier_wait_start (&team->barrier);
+  bstate = gomp_barrier_wait_start (&team->barrier);
 
-  if (last)
+  if (gomp_barrier_last_thread (bstate))
     {
-      unsigned ws_index;
-
-      ws_index = thr->ts.work_share_generation & team->generation_mask;
-      team->work_shares[ws_index] = NULL;
-      team->oldest_live_gen++;
-      team->num_live_gen = 0;
-
-      free_work_share (ws);
+      if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
+	free_work_share (team, thr->ts.last_work_share);
     }
 
-  gomp_barrier_wait_end (&team->barrier, last);
+  gomp_team_barrier_wait_end (&team->barrier, bstate);
+  thr->ts.last_work_share = NULL;
 }
 
-
 /* The current thread is done with its current work sharing construct.
    This version does NOT imply a barrier at the end of the work-share.  */
 
@@ -188,15 +242,17 @@ gomp_work_share_end_nowait (void)
   struct gomp_work_share *ws = thr->ts.work_share;
   unsigned completed;
 
-  thr->ts.work_share = NULL;
-
   /* Work sharing constructs can be orphaned.  */
   if (team == NULL)
     {
-      free_work_share (ws);
+      free_work_share (NULL, ws);
+      thr->ts.work_share = NULL;
       return;
     }
 
+  if (__builtin_expect (thr->ts.last_work_share == NULL, 0))
+    return;
+
 #ifdef HAVE_SYNC_BUILTINS
   completed = __sync_add_and_fetch (&ws->threads_completed, 1);
 #else
@@ -206,18 +262,6 @@ gomp_work_share_end_nowait (void)
 #endif
 
   if (completed == team->nthreads)
-    {
-      unsigned ws_index;
-
-      gomp_mutex_lock (&team->work_share_lock);
-
-      ws_index = thr->ts.work_share_generation & team->generation_mask;
-      team->work_shares[ws_index] = NULL;
-      team->oldest_live_gen++;
-      team->num_live_gen--;
-
-      gomp_mutex_unlock (&team->work_share_lock);
-
-      free_work_share (ws);
-    }
+    free_work_share (team, thr->ts.last_work_share);
+  thr->ts.last_work_share = NULL;
 }