aboutsummaryrefslogtreecommitdiff
path: root/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-many.c
blob: 08e026960e603d32016f800f14880e61f43a9199 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
/* To avoid 'error: shared-memory region overflow':
   { dg-additional-options "-foffload-options=amdgcn-amdhsa=-mgang-private-size=64" { target openacc_radeon_accel_selected } }
*/

#include <assert.h>
#include <stdio.h>

#if ACC_DEVICE_TYPE_nvidia
/* To avoid 'libgomp: The Nvidia accelerator has insufficient resources'.  */
#define NUM_WORKERS 28
#else
#define NUM_WORKERS 32
#endif

#define LOCAL(n) double n = input;
#define LOCALS(n) LOCAL(n##1) LOCAL(n##2) LOCAL(n##3) LOCAL(n##4) \
		  LOCAL(n##5) LOCAL(n##6) LOCAL(n##7) LOCAL(n##8)
#define LOCALS2(n) LOCALS(n##a) LOCALS(n##b) LOCALS(n##c) LOCALS(n##d) \
		   LOCALS(n##e) LOCALS(n##f) LOCALS(n##g) LOCALS(n##h)

#define USE(n) n
#define USES(n,OP) USE(n##1) OP USE(n##2) OP USE(n##3) OP USE (n##4) OP \
		   USE(n##5) OP USE(n##6) OP USE(n##7) OP USE (n##8)
#define USES2(n,OP) USES(n##a,OP) OP USES(n##b,OP) OP USES(n##c,OP) OP \
		    USES(n##d,OP) OP USES(n##e,OP) OP USES(n##f,OP) OP \
		    USES(n##g,OP) OP USES(n##h,OP)

int main (void)
{
  int ret;
  int input = 1;

  #pragma acc parallel num_gangs(1) num_workers(NUM_WORKERS) copyout(ret)
  {
    int w = 0;
    LOCALS2(h);

    #pragma acc loop worker reduction(+:w)
    for (int i = 0; i < 32; i++)
      {
	int u = USES2(h,+);
	w += u;
      }

    printf ("w=%d\n", w);
    /* { dg-output "w=2048(\n|\r\n|\r)" } */

    LOCALS2(i);

    #pragma acc loop worker reduction(+:w)
    for (int i = 0; i < 32; i++)
      {
	int u = USES2(i,+);
	w += u;
      }

    printf ("w=%d\n", w);
    /* { dg-output "w=4096(\n|\r\n|\r)" } */

    LOCALS2(j);
    LOCALS2(k);

    #pragma acc loop worker reduction(+:w)
    for (int i = 0; i < 32; i++)
      {
	int u = USES2(j,+);
	w += u;
      }

    printf ("w=%d\n", w);
    /* { dg-output "w=6144(\n|\r\n|\r)" } */

    #pragma acc loop worker reduction(+:w)
    for (int i = 0; i < 32; i++)
      {
	int u = USES2(k,+);
	w += u;
      }

    ret = (w == 64 * 32 * 4);
    printf ("w=%d\n", w);
    /* { dg-output "w=8192(\n|\r\n|\r)" } */
  }

  assert (ret);

  return 0;
}