1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
/* Test for omp_target_memcpy_async considering dependence objects. */
#include <omp.h>
#include <stdlib.h>
int
main ()
{
int d = omp_get_default_device ();
int id = omp_get_initial_device ();
int a[128], b[64], c[32], e[16], q[128], i;
void *p;
if (d < 0 || d >= omp_get_num_devices ())
d = id;
p = omp_target_alloc (130 * sizeof (int), d);
if (p == NULL)
return 0;
for (i = 0; i < 128; ++i)
a[i] = i + 1;
for (i = 0; i < 64; ++i)
b[i] = i + 2;
for (i = 0; i < 32; i++)
c[i] = 0;
for (i = 0; i < 16; i++)
e[i] = i + 4;
omp_depend_t obj[2];
#pragma omp parallel num_threads(5)
#pragma omp single
{
#pragma omp task depend(out: p)
omp_target_memcpy (p, a, 128 * sizeof (int), 0, 0, d, id);
#pragma omp task depend(inout: p)
omp_target_memcpy (p, b, 64 * sizeof (int), 0, 0, d, id);
#pragma omp task depend(out: c)
for (i = 0; i < 32; i++)
c[i] = i + 3;
#pragma omp depobj(obj[0]) depend(inout: p)
#pragma omp depobj(obj[1]) depend(in: c)
omp_target_memcpy_async (p, c, 32 * sizeof (int), 0, 0, d, id, 2, obj);
#pragma omp task depend(in: p)
omp_target_memcpy (p, e, 16 * sizeof (int), 0, 0, d, id);
}
#pragma omp taskwait
for (i = 0; i < 128; ++i)
q[i] = 0;
omp_target_memcpy (q, p, 128 * sizeof(int), 0, 0, id, d);
for (i = 0; i < 16; ++i)
if (q[i] != i + 4)
abort ();
for (i = 16; i < 32; ++i)
if (q[i] != i + 3)
abort ();
for (i = 32; i < 64; ++i)
if (q[i] != i + 2)
abort ();
for (i = 64; i < 128; ++i)
if (q[i] != i + 1)
abort ();
omp_target_free (p, d);
return 0;
}
|