aboutsummaryrefslogtreecommitdiff
path: root/libgomp/testsuite/libgomp.oacc-c-c++-common/pr87835.c
blob: e48f3074d58f0eac4913e359ce34c5506a0ad2aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/* { dg-do run { target openacc_nvidia_accel_selected } } */
/* { dg-additional-options "-lcuda" } */
/* { dg-require-effective-target openacc_cuda } */

#include <openacc.h>
#include <stdlib.h>
#include "cuda.h"

#include <stdio.h>

#define n 128

int
main (void)
{
  CUresult r;
  CUstream stream1;
  int N = n;
  int a[n];
  int c[n];

  acc_init (acc_device_nvidia);

  r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
      abort ();
    }

  acc_set_cuda_stream (1, stream1);

  for (int i = 0; i < n; i++)
    {
      a[i] = 3;
      c[i] = 0;
    }

#pragma acc data copy (a, c) copyin (N)
  {
#pragma acc parallel async (1)
    ;

#pragma acc parallel async (1) num_gangs (320)
    #pragma acc loop gang
    for (int ii = 0; ii < N; ii++)
      c[ii] = (a[ii] + a[N - ii - 1]);

#pragma acc parallel async (1)
    #pragma acc loop seq
    for (int ii = 0; ii < n; ii++)
      a[ii] = 6;

#pragma acc wait (1)
  }

  for (int i = 0; i < n; i++)
    if (c[i] != 6)
      abort ();

  return 0;
}