aboutsummaryrefslogtreecommitdiff
path: root/libgomp/testsuite/libgomp.oacc-c-c++-common/private-big-1.c
blob: c0e8db0c894ceaedb09bca8e68410b9de1bcc98f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/* Test "big" private data.  */

/* { dg-additional-options -fno-inline } for stable results regarding OpenACC 'routine'.  */

/* { dg-additional-options -fopt-info-all-omp }
   { dg-additional-options --param=openacc-privatization=noisy }
   { dg-additional-options -foffload=-fopt-info-all-omp }
   { dg-additional-options -foffload=--param=openacc-privatization=noisy }
   for testing/documenting aspects of that functionality.  */

/* { dg-additional-options -Wopenacc-parallelism } for testing/documenting
   aspects of that functionality.  */

/* For GCN offloading compilation, we (expectedly) run into a
   'gang-private data-share memory exhausted' error: the default
   '-mgang-private-size' is too small.  Raise it so that 'uint32_t x[344]' plus
   some internal-use data fits in:
   { dg-additional-options -foffload-options=amdgcn-amdhsa=-mgang-private-size=1555 { target openacc_radeon_accel_selected } } */

/* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
   passed to 'incr' may be unset, and in that case, it will be set to [...]",
   so to maintain compatibility with earlier Tcl releases, we manually
   initialize counter variables:
   { dg-line l_dummy[variable c_compute 0 c_loop 0] }
   { dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid
   "WARNING: dg-line var l_dummy defined, but not used".  */

#include <assert.h>
#include <stdint.h>


/* Based on 'private-variables.c:loop_g_5'.  */

/* To demonstrate PR105421 "GCN offloading, raised '-mgang-private-size':
   'HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION'", a 'struct' indirection, for
   example, has been necessary in combination with a separate routine.  */

struct data
{
  uint32_t *x;
  uint32_t *arr;
  uint32_t i;
};

#pragma acc routine worker
static void
loop_g_5_r(struct data *data)
{
  uint32_t *x = data->x;
  uint32_t *arr = data->arr;
  uint32_t i = data->i;

#pragma acc loop /* { dg-line l_loop[incr c_loop] } */
  /* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop$c_loop } */
  /* { dg-optimized {assigned OpenACC worker vector loop parallelism} {} { target *-*-* } l_loop$c_loop } */
  for (int j = 0; j < 320; j++)
    arr[i * 320 + j] += x[(i * 320 + j) % 344];
}

void loop_g_5()
{
  uint32_t x[344], i, arr[320 * 320];

  for (i = 0; i < 320 * 320; i++)
    arr[i] = i;

  #pragma acc parallel copy(arr)
  {
    #pragma acc loop gang private(x) /* { dg-line l_loop[incr c_loop] } */
    /* { dg-note {variable 'x' in 'private' clause is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_loop$c_loop }
       { dg-note {variable 'x' ought to be adjusted for OpenACC privatization level: 'gang'} {} { target *-*-* } l_loop$c_loop }
       { dg-note {variable 'x' adjusted for OpenACC privatization level: 'gang'} {} { target { ! openacc_host_selected } } l_loop$c_loop } */
    /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop$c_loop } */
    /* { dg-note {variable 'data' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_loop$c_loop }
       { dg-note {variable 'data' ought to be adjusted for OpenACC privatization level: 'gang'} {} { target *-*-* } l_loop$c_loop }
       { dg-note {variable 'data' adjusted for OpenACC privatization level: 'gang'} {} { target { ! openacc_host_selected } } l_loop$c_loop } */
    /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop$c_loop } */
    /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop$c_loop } */
    for (i = 0; i < 320; i++)
      {
        for (int j = 0; j < 344; j++)
	  x[j] = j * (2 + i);

	struct data data = { x, arr, i };
	loop_g_5_r(&data); /* { dg-line l_compute[incr c_compute] } */
	/* { dg-optimized {assigned OpenACC worker vector loop parallelism} {} { target *-*-* } l_compute$c_compute } */
      }
  }

  for (i = 0; i < 320 * 320; i++)
    assert(arr[i] == i + (i % 344) * (2 + (i / 320)));
}


int main ()
{
  loop_g_5();

  return 0;
}