aboutsummaryrefslogtreecommitdiff
path: root/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c
blob: 591c0f322ef4d5dc7b4e5a0bfc810aed37280030 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#include <stdlib.h>

#define N (1024 * 512)
#define COUNTERTYPE unsigned int

int
main (void)
{
  unsigned int *__restrict a;
  unsigned int *__restrict b;
  unsigned int *__restrict c;

  a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
  b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));
  c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int));

  /* Parallelism dimensions: compiler/runtime decides.  */
#pragma acc kernels copyout (a[0:N])
  {
    for (COUNTERTYPE i = 0; i < N; i++)
      a[i] = i * 2;
  }

  /* Parallelism dimensions: variable.  */
#pragma acc kernels copyout (b[0:N]) \
  num_gangs (3 + a[3]) num_workers (5 + a[5]) vector_length (7 + a[7])
  /* { dg-prune-output "using .vector_length \\(32\\)., ignoring runtime setting" } */
  {
    for (COUNTERTYPE i = 0; i < N; i++)
      b[i] = i * 4;
  }

  /* Parallelism dimensions: literal.  */
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) \
  num_gangs (3) num_workers (5) vector_length (7)
  /* { dg-prune-output "using .vector_length \\(32\\)., ignoring 7" } */
  {
    for (COUNTERTYPE ii = 0; ii < N; ii++)
      c[ii] = a[ii] + b[ii];
  }

  for (COUNTERTYPE i = 0; i < N; i++)
    {
      if (a[i] != i * 2)
	abort ();
      if (b[i] != i * 4)
	abort ();
      if (c[i] != a[i] + b[i])
	abort ();
    }

  free (a);
  free (b);
  free (c);

  return 0;
}