/* Test that threads generate distinct streams of randomness.
   Copyright (C) 2022-2023 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <https://www.gnu.org/licenses/>.  */

#include <array_length.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <support/check.h>
#include <support/namespace.h>
#include <support/support.h>
#include <support/xthread.h>

/* Number of arc4random_buf calls per thread.  */
enum { count_per_thread = 2048 };

/* Number of threads computing randomness.  */
enum { inner_threads = 4 };

/* Number of threads launching other threads.  */
static int outer_threads = 1;

/* Number of launching rounds performed by the outer threads.  */
enum { outer_rounds = 10 };

/* Maximum number of bytes generated in an arc4random call.  */
enum { max_size = 32 };

/* Sizes generated by threads.  Must be long enough to be unique with
   high probability.  */
static const int sizes[] = { 12, 15, 16, 17, 24, 31, max_size };

/* Data structure to capture randomness results.  */
struct blob
{
  unsigned int size;
  int thread_id;
  unsigned int index;
  unsigned char bytes[max_size];
};

struct subprocess_args
{
  struct blob *blob;
  void (*func)(unsigned char *, size_t);
};

static void
generate_arc4random (unsigned char *bytes, size_t size)
{
  int i;
  for (i = 0; i < size / sizeof (uint32_t); i++)
    {
      uint32_t x = arc4random ();
      memcpy (&bytes[4 * i], &x, sizeof x);
    }
  int rem = size % sizeof (uint32_t);
  if (rem > 0)
    {
      uint32_t x = arc4random ();
      memcpy (&bytes[4 * i], &x, rem);
    }
}

static void
generate_arc4random_buf (unsigned char *bytes, size_t size)
{
  arc4random_buf (bytes, size);
}

static void
generate_arc4random_uniform (unsigned char *bytes, size_t size)
{
  for (int i = 0; i < size; i++)
    bytes[i] = arc4random_uniform (256);
}

#define DYNARRAY_STRUCT dynarray_blob
#define DYNARRAY_ELEMENT struct blob
#define DYNARRAY_PREFIX dynarray_blob_
#include <malloc/dynarray-skeleton.c>

/* Sort blob elements by length first, then by comparing the data
   member.  */
static int
compare_blob (const void *left1, const void *right1)
{
  const struct blob *left = left1;
  const struct blob *right = right1;

  if (left->size != right->size)
    /* No overflow due to limited range.  */
    return left->size - right->size;
  return memcmp (left->bytes, right->bytes, left->size);
}

/* Used to store the global result.  */
static pthread_mutex_t global_result_lock = PTHREAD_MUTEX_INITIALIZER;
static struct dynarray_blob global_result;

/* Copy data to the global result, with locking.  */
static void
copy_result_to_global (struct dynarray_blob *result)
{
  xpthread_mutex_lock (&global_result_lock);
  size_t old_size = dynarray_blob_size (&global_result);
  TEST_VERIFY_EXIT
    (dynarray_blob_resize (&global_result,
                           old_size + dynarray_blob_size (result)));
  memcpy (dynarray_blob_begin (&global_result) + old_size,
          dynarray_blob_begin (result),
          dynarray_blob_size (result) * sizeof (struct blob));
  xpthread_mutex_unlock (&global_result_lock);
}

/* Used to assign unique thread IDs.  Accessed atomically.  */
static int next_thread_id;

static void *
inner_thread (void *closure)
{
  void (*func) (unsigned char *, size_t) = closure;

  /* Use local result to avoid global lock contention while generating
     randomness.  */
  struct dynarray_blob result;
  dynarray_blob_init (&result);

  int thread_id = __atomic_fetch_add (&next_thread_id, 1, __ATOMIC_RELAXED);

  /* Determine the sizes to be used by this thread.  */
  int size_slot = thread_id % (array_length (sizes) + 1);
  bool switch_sizes = size_slot == array_length (sizes);
  if (switch_sizes)
    size_slot = 0;

  /* Compute the random blobs.  */
  for (int i = 0; i < count_per_thread; ++i)
    {
      struct blob *place = dynarray_blob_emplace (&result);
      TEST_VERIFY_EXIT (place != NULL);
      place->size = sizes[size_slot];
      place->thread_id = thread_id;
      place->index = i;
      func (place->bytes, place->size);

      if (switch_sizes)
        size_slot = (size_slot + 1) % array_length (sizes);
    }

  /* Store the blobs in the global result structure.  */
  copy_result_to_global (&result);

  dynarray_blob_free (&result);

  return NULL;
}

/* Launch the inner threads and wait for their termination.  */
static void *
outer_thread (void *closure)
{
  void (*func) (unsigned char *, size_t) = closure;

  for (int round = 0; round < outer_rounds; ++round)
    {
      pthread_t threads[inner_threads];

      for (int i = 0; i < inner_threads; ++i)
        threads[i] = xpthread_create (NULL, inner_thread, func);

      for (int i = 0; i < inner_threads; ++i)
        xpthread_join (threads[i]);
    }

  return NULL;
}

static bool termination_requested;

/* Call arc4random_buf to fill one blob with 16 bytes.  */
static void *
get_one_blob_thread (void *closure)
{
  struct subprocess_args *arg = closure;
  struct blob *result = arg->blob;

  result->size = 16;
  arg->func (result->bytes, result->size);
  return NULL;
}

/* Invoked from fork_thread to actually obtain randomness data.  */
static void
fork_thread_subprocess (void *closure)
{
  struct subprocess_args *arg = closure;
  struct blob *shared_result = arg->blob;

  struct subprocess_args args[3] =
  {
    { shared_result + 0, arg->func },
    { shared_result + 1, arg->func },
    { shared_result + 2, arg->func }
  };

  pthread_t thr1 = xpthread_create (NULL, get_one_blob_thread, &args[1]);
  pthread_t thr2 = xpthread_create (NULL, get_one_blob_thread, &args[2]);
  get_one_blob_thread (&args[0]);
  xpthread_join (thr1);
  xpthread_join (thr2);
}

/* Continuously fork subprocesses to obtain a little bit of
   randomness.  */
static void *
fork_thread (void *closure)
{
  void (*func)(unsigned char *, size_t) = closure;

  struct dynarray_blob result;
  dynarray_blob_init (&result);

  /* Three blobs from each subprocess.  */
  struct blob *shared_result
    = support_shared_allocate (3 * sizeof (*shared_result));

  while (!__atomic_load_n (&termination_requested, __ATOMIC_RELAXED))
    {
      /* Obtain the results from a subprocess.  */
      struct subprocess_args arg = { shared_result, func };
      support_isolate_in_subprocess (fork_thread_subprocess, &arg);

      for (int i = 0; i < 3; ++i)
        {
          struct blob *place = dynarray_blob_emplace (&result);
          TEST_VERIFY_EXIT (place != NULL);
          place->size = shared_result[i].size;
          place->thread_id = -1;
          place->index = i;
          memcpy (place->bytes, shared_result[i].bytes, place->size);
        }
    }

  support_shared_free (shared_result);

  copy_result_to_global (&result);
  dynarray_blob_free (&result);

  return NULL;
}

/* Launch the outer threads and wait for their termination.  */
static void
run_outer_threads (void (*func)(unsigned char *, size_t))
{
  /* Special thread that continuously calls fork.  */
  pthread_t fork_thread_id = xpthread_create (NULL, fork_thread, func);

  pthread_t threads[outer_threads];
  for (int i = 0; i < outer_threads; ++i)
    threads[i] = xpthread_create (NULL, outer_thread, func);

  for (int i = 0; i < outer_threads; ++i)
    xpthread_join (threads[i]);

  __atomic_store_n (&termination_requested, true, __ATOMIC_RELAXED);
  xpthread_join (fork_thread_id);
}

static int
do_test_func (const char *fname, void (*func)(unsigned char *, size_t))
{
  dynarray_blob_init (&global_result);
  int expected_blobs
    = count_per_thread * inner_threads * outer_threads * outer_rounds;
  printf ("info: %s: minimum of %d blob results expected\n",
	  fname, expected_blobs);

  run_outer_threads (func);

  /* The forking thread delivers a non-deterministic number of
     results, which is why expected_blobs is only a minimum number of
     results.  */
  printf ("info: %s: %zu blob results observed\n", fname,
          dynarray_blob_size (&global_result));
  TEST_VERIFY (dynarray_blob_size (&global_result) >= expected_blobs);

  /* Verify that there are no duplicates.  */
  qsort (dynarray_blob_begin (&global_result),
         dynarray_blob_size (&global_result),
         sizeof (struct blob), compare_blob);
  struct blob *end = dynarray_blob_end (&global_result);
  for (struct blob *p = dynarray_blob_begin (&global_result) + 1;
       p < end; ++p)
    {
      if (compare_blob (p - 1, p) == 0)
        {
          support_record_failure ();
          char *quoted = support_quote_blob (p->bytes, p->size);
          printf ("error: %s: duplicate blob: \"%s\" (%d bytes)\n",
		  fname, quoted, (int) p->size);
          printf ("  first source: thread %d, index %u\n",
                  p[-1].thread_id, p[-1].index);
          printf ("  second source: thread %d, index %u\n",
                  p[0].thread_id, p[0].index);
          free (quoted);
        }
    }

  dynarray_blob_free (&global_result);

  return 0;
}

static int
do_test (void)
{
  /* Do not run more threads than the maximum of schedulable CPUs.  */
  cpu_set_t cpuset;
  if (sched_getaffinity (0, sizeof cpuset, &cpuset) == 0)
    {
      unsigned int ncpus = CPU_COUNT (&cpuset);
      /* Limit the number to not overload the system.  */
      outer_threads = (ncpus / 2) / inner_threads ?: 1;
    }

  printf ("info: outer_threads=%d inner_threads=%d\n", outer_threads,
	  inner_threads);

  do_test_func ("arc4random", generate_arc4random);
  do_test_func ("arc4random_buf", generate_arc4random_buf);
  do_test_func ("arc4random_uniform", generate_arc4random_uniform);

  return 0;
}

#include <support/test-driver.c>