aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorChung-Lin Tang <cltang@codesourcery.com>2023-06-13 08:44:31 -0700
committerThomas Schwinge <thomas@codesourcery.com>2023-10-25 10:49:55 +0200
commit3a3596389c2e539cb8fd5dc5784a4e2afe193a2a (patch)
tree50039ace710890df8318e0762569e86afcd62c53 /libgomp
parentfa68e04e760bc971faa38ce9811a023ebe9d89bb (diff)
downloadgcc-3a3596389c2e539cb8fd5dc5784a4e2afe193a2a.zip
gcc-3a3596389c2e539cb8fd5dc5784a4e2afe193a2a.tar.gz
gcc-3a3596389c2e539cb8fd5dc5784a4e2afe193a2a.tar.bz2
OpenACC 2.7: Implement self clause for compute constructs
This patch implements the 'self' clause for compute constructs: parallel, kernels, and serial. This clause conditionally uses the local device (the host mult-core CPU) as the executing device of the compute region. The actual implementation of the "local device" device type inside libgomp (presumably using pthreads) is still not yet completed, so the libgomp side is still implemented the exact same as host-fallback mode. (so as of now, it essentially behaves like the 'if' clause with the condition inverted) gcc/c/ChangeLog: * c-parser.cc (c_parser_oacc_compute_clause_self): New function. (c_parser_oacc_all_clauses): Add new 'bool compute_p = false' parameter, add parsing of self clause when compute_p is true. (OACC_KERNELS_CLAUSE_MASK): Add PRAGMA_OACC_CLAUSE_SELF. (OACC_PARALLEL_CLAUSE_MASK): Likewise, (OACC_SERIAL_CLAUSE_MASK): Likewise. (c_parser_oacc_compute): Adjust call to c_parser_oacc_all_clauses to set compute_p argument to true. * c-typeck.cc (c_finish_omp_clauses): Add OMP_CLAUSE_SELF case. gcc/cp/ChangeLog: * parser.cc (cp_parser_oacc_compute_clause_self): New function. (cp_parser_oacc_all_clauses): Add new 'bool compute_p = false' parameter, add parsing of self clause when compute_p is true. (OACC_KERNELS_CLAUSE_MASK): Add PRAGMA_OACC_CLAUSE_SELF. (OACC_PARALLEL_CLAUSE_MASK): Likewise, (OACC_SERIAL_CLAUSE_MASK): Likewise. (cp_parser_oacc_compute): Adjust call to c_parser_oacc_all_clauses to set compute_p argument to true. * pt.cc (tsubst_omp_clauses): Add OMP_CLAUSE_SELF case. * semantics.cc (c_finish_omp_clauses): Add OMP_CLAUSE_SELF case, merged with OMP_CLAUSE_IF case. gcc/fortran/ChangeLog: * gfortran.h (typedef struct gfc_omp_clauses): Add self_expr field. * openmp.cc (enum omp_mask2): Add OMP_CLAUSE_SELF. (gfc_match_omp_clauses): Add handling for OMP_CLAUSE_SELF. (OACC_PARALLEL_CLAUSES): Add OMP_CLAUSE_SELF. (OACC_KERNELS_CLAUSES): Likewise. (OACC_SERIAL_CLAUSES): Likewise. (resolve_omp_clauses): Add handling for omp_clauses->self_expr. * trans-openmp.cc (gfc_trans_omp_clauses): Add handling of clauses->self_expr and building of OMP_CLAUSE_SELF tree clause. (gfc_split_omp_clauses): Add handling of self_expr field copy. gcc/ChangeLog: * gimplify.cc (gimplify_scan_omp_clauses): Add OMP_CLAUSE_SELF case. (gimplify_adjust_omp_clauses): Likewise. * omp-expand.cc (expand_omp_target): Add OMP_CLAUSE_SELF expansion code, * omp-low.cc (scan_sharing_clauses): Add OMP_CLAUSE_SELF case. * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE_SELF enum. * tree-nested.cc (convert_nonlocal_omp_clauses): Add OMP_CLAUSE_SELF case. (convert_local_omp_clauses): Likewise. * tree-pretty-print.cc (dump_omp_clause): Add OMP_CLAUSE_SELF case. * tree.cc (omp_clause_num_ops): Add OMP_CLAUSE_SELF entry. (omp_clause_code_name): Likewise. * tree.h (OMP_CLAUSE_SELF_EXPR): New macro. gcc/testsuite/ChangeLog: * c-c++-common/goacc/self-clause-1.c: New test. * c-c++-common/goacc/self-clause-2.c: New test. * gfortran.dg/goacc/self.f95: New test. include/ChangeLog: * gomp-constants.h (GOACC_FLAG_LOCAL_DEVICE): New flag bit value. libgomp/ChangeLog: * oacc-parallel.c (GOACC_parallel_keyed): Add code to handle GOACC_FLAG_LOCAL_DEVICE case. * testsuite/libgomp.oacc-c-c++-common/self-1.c: New test.
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/oacc-parallel.c11
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/self-1.c962
2 files changed, 973 insertions, 0 deletions
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 363e665..cf37a1b 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -193,6 +193,17 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
goacc_restore_bind ();
goto out_prof;
}
+ else if (flags & GOACC_FLAG_LOCAL_DEVICE)
+ {
+ /* TODO: a proper pthreads based "multi-core CPU" local device
+ implementation. Currently, this is still the same as host-fallback. */
+ prof_info.device_type = acc_device_host;
+ api_info.device_type = prof_info.device_type;
+ goacc_save_and_set_bind (acc_device_host);
+ fn (hostaddrs);
+ goacc_restore_bind ();
+ goto out_prof;
+ }
else if (acc_device_type (acc_dev->type) == acc_device_host)
{
fn (hostaddrs);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/self-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/self-1.c
new file mode 100644
index 0000000..752e16e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/self-1.c
@@ -0,0 +1,962 @@
+#include <openacc.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define N 32
+
+int
+main(int argc, char **argv)
+{
+ float *a, *b, *d_a, *d_b, exp, exp2;
+ int i;
+ const int one = 1;
+ const int zero = 0;
+ int n;
+
+ a = (float *) malloc (N * sizeof (float));
+ b = (float *) malloc (N * sizeof (float));
+ d_a = (float *) acc_malloc (N * sizeof (float));
+ d_b = (float *) acc_malloc (N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ a[i] = 4.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) self(0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 5.0;
+#else
+ exp = 4.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 16.0;
+
+#pragma acc parallel self(1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 17.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 8.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) self(!one)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 9.0;
+#else
+ exp = 8.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 22.0;
+
+#pragma acc parallel self(!zero)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 23.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 16.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) self(false)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 17.0;
+#else
+ exp = 16.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 76.0;
+
+#pragma acc parallel self(true)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 77.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 22.0;
+
+ n = 1;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) self(!n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 23.0;
+#else
+ exp = 22.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 18.0;
+
+ n = 0;
+
+#pragma acc parallel self(!n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 19.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 49.0;
+
+ n = 1;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) self(!(n + n))
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 50.0;
+#else
+ exp = 49.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 38.0;
+
+ n = 0;
+
+#pragma acc parallel self(!(n + n))
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 39.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 91.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) self(!(-2))
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 92.0;
+#else
+ exp = 91.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 43.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) self(one != 1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 44.0;
+#else
+ exp = 43.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 87.0;
+
+#pragma acc parallel self(one != 0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 88.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 9.0;
+ }
+
+#if ACC_MEM_SHARED
+ exp = 0.0;
+ exp2 = 0.0;
+#else
+ acc_map_data (a, d_a, N * sizeof (float));
+ acc_map_data (b, d_b, N * sizeof (float));
+ exp = 3.0;
+ exp2 = 9.0;
+#endif
+
+#pragma acc update device(a[0:N], b[0:N]) if(1)
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 0.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update host(a[0:N], b[0:N]) if(1)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != exp)
+ abort();
+
+ if (b[i] != exp2)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 12.0;
+ }
+
+#pragma acc update device(a[0:N], b[0:N]) if(0)
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 0.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update host(a[0:N], b[0:N]) if(1)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != exp)
+ abort();
+
+ if (b[i] != exp2)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 26.0;
+ b[i] = 21.0;
+ }
+
+#pragma acc update device(a[0:N], b[0:N]) if(1)
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 0.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update host(a[0:N], b[0:N]) if(0)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 0.0)
+ abort();
+
+ if (b[i] != 0.0)
+ abort();
+ }
+
+#if !ACC_MEM_SHARED
+ acc_unmap_data (a);
+ acc_unmap_data (b);
+#endif
+
+ acc_free (d_a);
+ acc_free (d_b);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(1)
+{
+#pragma acc parallel present(a[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ b[ii] = a[ii];
+ }
+ }
+}
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 4.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 8.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(0)
+{
+#if !ACC_MEM_SHARED
+ if (acc_is_present (a, N * sizeof (float)))
+ abort ();
+#endif
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+}
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 18.0;
+ b[i] = 21.0;
+ }
+
+#pragma acc data copyin(a[0:N]) if(1)
+{
+#if !ACC_MEM_SHARED
+ if (!acc_is_present (a, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc data copyout(b[0:N]) if(0)
+ {
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc data copyout(b[0:N]) if(1)
+ {
+#pragma acc parallel present(a[0:N]) present(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ b[ii] = a[ii];
+ }
+ }
+ }
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+ }
+}
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 18.0)
+ abort ();
+ }
+
+#pragma acc enter data copyin (b[0:N]) if (0)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc exit data delete (b[0:N]) if (0)
+
+#pragma acc enter data copyin (b[0:N]) if (1)
+
+#if !ACC_MEM_SHARED
+ if (!acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc exit data delete (b[0:N]) if (1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc enter data copyin (b[0:N]) if (zero)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc exit data delete (b[0:N]) if (zero)
+
+#pragma acc enter data copyin (b[0:N]) if (one)
+
+#if !ACC_MEM_SHARED
+ if (!acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc exit data delete (b[0:N]) if (one)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc enter data copyin (b[0:N]) if (one == 0)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc exit data delete (b[0:N]) if (one == 0)
+
+#pragma acc enter data copyin (b[0:N]) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (!acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc exit data delete (b[0:N]) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+ for (i = 0; i < N; i++)
+ a[i] = 4.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) self(0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 5.0;
+#else
+ exp = 4.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 16.0;
+
+#pragma acc kernels self(1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 17.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 8.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) self(!one)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 9.0;
+#else
+ exp = 8.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 22.0;
+
+#pragma acc kernels self(!zero)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 23.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 16.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) self(false)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 17.0;
+#else
+ exp = 16.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 76.0;
+
+#pragma acc kernels self(true)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 77.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 22.0;
+
+ n = 1;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) self(!n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 23.0;
+#else
+ exp = 22.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 18.0;
+
+ n = 0;
+
+#pragma acc kernels self(!n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 19.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 49.0;
+
+ n = 1;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) self((n + n) == 0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 50.0;
+#else
+ exp = 49.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 38.0;
+
+ n = 0;
+
+#pragma acc kernels self(!(n + n))
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 39.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 91.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) self(!(-2))
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 92.0;
+#else
+ exp = 91.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 43.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) self(one != 1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 44.0;
+#else
+ exp = 43.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 87.0;
+
+#pragma acc kernels self(one != 0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 88.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 9.0;
+ }
+
+#if ACC_MEM_SHARED
+ exp = 0.0;
+ exp2 = 0.0;
+#else
+ acc_map_data (a, d_a, N * sizeof (float));
+ acc_map_data (b, d_b, N * sizeof (float));
+ exp = 3.0;
+ exp2 = 9.0;
+#endif
+
+ return 0;
+}