/* Test for omp_target_memcpy_rect_async considering dependence objects. */ #include #include #define NUM_DIMS 3 int main () { int d = omp_get_default_device (); int id = omp_get_initial_device (); int a[128], b[64], c[128], e[16], q[128], i; void *p; if (d < 0 || d >= omp_get_num_devices ()) d = id; p = omp_target_alloc (130 * sizeof (int), d); if (p == NULL) return 0; for (i = 0; i < 128; i++) q[i] = 0; if (omp_target_memcpy (p, q, 128 * sizeof (int), 0, 0, d, id) != 0) abort (); size_t volume[NUM_DIMS] = { 2, 2, 3 }; size_t dst_offsets[NUM_DIMS] = { 0, 0, 0 }; size_t src_offsets[NUM_DIMS] = { 0, 0, 0 }; size_t dst_dimensions[NUM_DIMS] = { 3, 4, 5 }; size_t src_dimensions[NUM_DIMS] = { 2, 3, 4 }; for (i = 0; i < 128; i++) a[i] = 42; for (i = 0; i < 64; i++) b[i] = 24; for (i = 0; i < 128; i++) c[i] = 0; for (i = 0; i < 16; i++) e[i] = 77; omp_depend_t obj[2]; #pragma omp parallel num_threads(5) #pragma omp single { #pragma omp task depend (out: p) omp_target_memcpy (p, a, 128 * sizeof (int), 0, 0, d, id); #pragma omp task depend(inout: p) omp_target_memcpy (p, b, 64 * sizeof (int), 0, 0, d, id); #pragma omp task depend(out: c) for (i = 0; i < 128; i++) c[i] = i + 1; #pragma omp depobj(obj[0]) depend(inout: p) #pragma omp depobj(obj[1]) depend(in: c) /* This produces: 1 2 3 - - 5 6 7 - - at positions 0..9 and 13 14 15 - - 17 18 19 - - at positions 20..29. */ omp_target_memcpy_rect_async (p, c, sizeof (int), NUM_DIMS, volume, dst_offsets, src_offsets, dst_dimensions, src_dimensions, d, id, 2, obj); #pragma omp task depend(in: p) omp_target_memcpy (p, e, 16 * sizeof (int), 0, 0, d, id); } #pragma omp taskwait if (omp_target_memcpy (q, p, 128 * sizeof(int), 0, 0, id, d) != 0) abort (); for (i = 0; i < 16; ++i) if (q[i] != 77) abort (); if (q[20] != 13 || q[21] != 14 || q[22] != 15 || q[25] != 17 || q[26] != 18 || q[27] != 19) abort (); for (i = 28; i < 64; ++i) if (q[i] != 24) abort (); for (i = 64; i < 128; ++i) if (q[i] != 42) abort (); omp_target_free (p, d); return 0; }