diff options
author | Thomas Koenig <tkoenig@gcc.gnu.org> | 2021-01-03 21:40:04 +0100 |
---|---|---|
committer | Thomas Koenig <tkoenig@gcc.gnu.org> | 2021-01-03 21:40:04 +0100 |
commit | afae4a55ccaa0de95ea11e5f634084db6ab2f444 (patch) | |
tree | d632cc867d10410ba9fb750523be790b86846ac4 /libgomp | |
parent | 9d9a82ec8478ff52c7a9d61f58cd2a7b6295b5f9 (diff) | |
parent | d2eb616a0f7bea78164912aa438c29fe1ef5774a (diff) | |
download | gcc-afae4a55ccaa0de95ea11e5f634084db6ab2f444.zip gcc-afae4a55ccaa0de95ea11e5f634084db6ab2f444.tar.gz gcc-afae4a55ccaa0de95ea11e5f634084db6ab2f444.tar.bz2 |
Merge branch 'master' into devel/coarray_native
Diffstat (limited to 'libgomp')
66 files changed, 1653 insertions, 226 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 35b6689..1d8355d 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,236 @@ +2020-12-28 Gerald Pfeifer <gerald@pfeifer.com> + + * libgomp.texi (Top): Avoid bad "up" link. + +2020-12-18 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c/task-6.c: New test. + +2020-12-09 Andrew Stubbs <ams@codesourcery.com> + + * plugin/plugin-gcn.c: Include hsa_ext_amd.h. + (HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT): Delete redundant definition. + +2020-12-08 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.fortran/scan-1.f90: New test. + +2020-12-05 Iain Sandoe <iain@sandoe.co.uk> + + PR target/97865 + * configure: Regenerate. + +2020-11-29 John David Anglin <danglin@gcc.gnu.org> + + * configure: Regenerate. + +2020-11-25 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c++/cache-1.C: New. + * testsuite/libgomp.oacc-c-c++-common/cache-1.c: Update. + +2020-11-25 Andrew Stubbs <ams@codesourcery.com> + + * testsuite/libgomp.oacc-fortran/atomic_capture-1.f90 (main): Adjust + expected results. + +2020-11-24 Andrew Stubbs <ams@codesourcery.com> + + * plugin/plugin-gcn.c: Don't redefine relocations if elf.h has them. + (reserved): Delete unused define. + +2020-11-24 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: Avoid + Tcl 8.5-specific behavior. + * testsuite/libgomp.oacc-fortran/pr94358-1.f90: Likewise. + +2020-11-18 Kwok Cheung Yeung <kcy@codesourcery.com> + + * env.c (gomp_global_icv): Remove nest_var field. Add + max_active_levels_var field. + (gomp_max_active_levels_var): Remove. + (parse_boolean): Return true on success. + (handle_omp_display_env): Express OMP_NESTED in terms of + max_active_levels_var. Change format specifier for + max_active_levels_var. + (initialize_env): Set max_active_levels_var from + OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS and + OMP_PROC_BIND. + * icv.c (omp_set_nested): Express in terms of + max_active_levels_var. + (omp_get_nested): Likewise. + (omp_set_max_active_levels): Use max_active_levels_var field instead + of gomp_max_active_levels_var. + (omp_get_max_active_levels): Likewise. + * libgomp.h (struct gomp_task_icv): Remove nest_var field. Add + max_active_levels_var field. + (gomp_supported_active_levels): Set to UCHAR_MAX. + (gomp_max_active_levels_var): Delete. + * libgomp.texi (omp_get_nested): Update documentation. + (omp_set_nested): Likewise. + (OMP_MAX_ACTIVE_LEVELS): Likewise. + (OMP_NESTED): Likewise. + (OMP_NUM_THREADS): Likewise. + (OMP_PROC_BIND): Likewise. + * parallel.c (gomp_resolve_num_threads): Replace reference + to nest_var with max_active_levels_var. Use max_active_levels_var + field instead of gomp_max_active_levels_var. + +2020-11-18 Tobias Burnus <tobias@codesourcery.com> + + * testsuite/libgomp.c/usleep.h (fallback_usleep): Renamed from + nvptx_usleep; use also for device={arch(gcn)}. + +2020-11-14 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c-c++-common/allocate-1.c (struct S): New type. + (foo): Add tests for non-VLA private and firstprivate clauses on + omp task. + (bar): Likewise. Remove taking of address from private/firstprivate + variables. + * testsuite/libgomp.c++/allocate-1.C (struct S): New type. + (foo): Add p, q, px and s arguments. Add tests for array reductions + and for non-VLA private and firstprivate clauses on omp task. + (bar): Removed. + (main): Adjust foo caller. Don't call bar. + +2020-11-13 Gergö Barany <gergo@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c: + New. + * testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/declare-vla.c: Adjust. + * testsuite/libgomp.oacc-fortran/pr94358-1.f90: Likewise. + +2020-11-13 Gergö Barany <gergo@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + + PR fortran/94358 + * testsuite/libgomp.oacc-fortran/pr94358-1.f90: New. + +2020-11-13 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c-c++-common/allocate-1.c (foo): Add tests + for array reductions. + (main): Adjust foo callers. + +2020-11-12 Jakub Jelinek <jakub@redhat.com> + + * libgomp.map (GOMP_alloc, GOMP_free): Export at GOMP_5.0.1. + * omp.h.in (omp_alloc): Add malloc and alloc_size attributes. + * libgomp_g.h (GOMP_alloc, GOMP_free): Declare. + * allocator.c (omp_aligned_alloc): New for now static function, + add alignment argument and handle it. + (omp_alloc): Reimplement using omp_aligned_alloc. + (GOMP_alloc, GOMP_free): New functions. + (omp_free): Add ialias. + * testsuite/libgomp.c-c++-common/allocate-1.c: New test. + * testsuite/libgomp.c++/allocate-1.C: New test. + +2020-11-12 Thomas Schwinge <thomas@codesourcery.com> + + PR fortran/97782 + * testsuite/libgomp.oacc-fortran/attach-descriptor-1.f90: Adjust. + +2020-11-10 Chung-Lin Tang <cltang@codesourcery.com> + + * libgomp.h (enum gomp_map_vars_kind): Adjust enum values to be bit-flag + usable. + * oacc-mem.c (acc_map_data): Adjust gomp_map_vars argument flags to + 'GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_ENTER_DATA'. + (goacc_enter_datum): Likewise for call to gomp_map_vars_async. + (goacc_enter_data_internal): Likewise. + * target.c (gomp_map_vars_internal): + Change checks of GOMP_MAP_VARS_ENTER_DATA to use bit-and (&). Adjust use + of gomp_attach_pointer for OpenMP cases. + (gomp_exit_data): Add handling of GOMP_MAP_DETACH. + (GOMP_target_enter_exit_data): Add handling of GOMP_MAP_ATTACH. + * testsuite/libgomp.c-c++-common/ptr-attach-1.c: New testcase. + +2020-11-05 Ulrich Drepper <drepper@redhat.com> + Kwok Cheung Yeung <kcy@codesourcery.com> + + * Makefile.am (%.mod): Add -cpp and -fopenmp to compile flags. + * Makefile.in: Regenerate. + * fortran.c: Wrap uses of omp_set_nested and omp_get_nested with + pragmas to ignore -Wdeprecated-declarations warnings. + * icv.c: Likewise. + * omp.h.in (__GOMP_DEPRECATED_5_0): Define. + Mark omp_lock_hint_* enum values, omp_lock_hint_t, omp_set_nested, + and omp_get_nested with __GOMP_DEPRECATED_5_0. + * omp_lib.f90.in: Mark omp_get_nested and omp_set_nested as + deprecated. + * testsuite/libgomp.c++/affinity-1.C: Add -Wno-deprecated-declarations + to test options. + * testsuite/libgomp.c/affinity-1.c: Likewise. + * testsuite/libgomp.c/affinity-2.c: Likewise. + * testsuite/libgomp.c/appendix-a/a.15.1.c: Likewise. + * testsuite/libgomp.c/lib-1.c: Likewise. + * testsuite/libgomp.c/nested-1.c: Likewise. + * testsuite/libgomp.c/nested-2.c: Likewise. + * testsuite/libgomp.c/nested-3.c: Likewise. + * testsuite/libgomp.c/pr32362-1.c: Likewise. + * testsuite/libgomp.c/pr32362-2.c: Likewise. + * testsuite/libgomp.c/pr32362-3.c: Likewise. + * testsuite/libgomp.c/pr35549.c: Likewise. + * testsuite/libgomp.c/pr42942.c: Likewise. + * testsuite/libgomp.c/pr61200.c: Likewise. + * testsuite/libgomp.c/sort-1.c: Likewise. + * testsuite/libgomp.c/target-5.c: Likewise. + * testsuite/libgomp.c/target-6.c: Likewise. + * testsuite/libgomp.c/teams-1.c: Likewise. + * testsuite/libgomp.c/thread-limit-1.c: Likewise. + * testsuite/libgomp.c/thread-limit-2.c: Likewise. + * testsuite/libgomp.c/thread-limit-4.c: Likewise. + * testsuite/libgomp.fortran/affinity1.f90: Likewise. + * testsuite/libgomp.fortran/lib1.f90: Likewise. + * testsuite/libgomp.fortran/lib2.f: Likewise. + * testsuite/libgomp.fortran/nested1.f90: Likewise. + * testsuite/libgomp.fortran/teams1.f90: Likewise. + +2020-11-02 Thomas Schwinge <thomas@codesourcery.com> + + PR target/85486 + * testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: Simplify and enhance. + * testsuite/libgomp.oacc-c-c++-common/pr85486-3.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/pr85486.c: Likewise. + +2020-11-02 Thomas Schwinge <thomas@codesourcery.com> + + PR testsuite/80219 + PR testsuite/85303 + * testsuite/lib/libgomp.exp (libgomp_init): Set + 'gcc_warning_prefix', 'gcc_error_prefix'. + +2020-10-30 Jakub Jelinek <jakub@redhat.com> + + * target.c (gomp_map_vars_internal): Use FIELD_TGT_EMPTY macro + even in field_tgt_clear initializer. + +2020-10-28 Jakub Jelinek <jakub@redhat.com> + + * testsuite/libgomp.c/target-42.c: New test. + +2020-10-28 Jakub Jelinek <jakub@redhat.com> + Tom de Vries <tdevries@suse.de> + + PR testsuite/81690 + * testsuite/libgomp.c/usleep.h: New file. + * testsuite/libgomp.c/target-32.c: Include usleep.h. + (main): Use tgt_usleep instead of usleep. + * testsuite/libgomp.c/thread-limit-2.c: Include usleep.h. + (main): Use tgt_usleep instead of usleep. + +2020-10-28 Jakub Jelinek <jakub@redhat.com> + + PR lto/96680 + * testsuite/libgomp.c/declare-variant-1.c: New test. + 2020-10-22 Jakub Jelinek <jakub@redhat.com> * testsuite/libgomp.c/target-41.c: New test. diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 586c930..4cf1f58 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -92,7 +92,7 @@ openacc_kinds.mod: openacc.mod openacc.mod: openacc.lo : %.mod: %.f90 - $(FC) $(FCFLAGS) -fsyntax-only $< + $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $< fortran.lo: libgomp_f.h fortran.o: libgomp_f.h env.lo: libgomp_f.h diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 00d5e29..eb868b3 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -1382,7 +1382,7 @@ openacc_kinds.mod: openacc.mod openacc.mod: openacc.lo : %.mod: %.f90 - $(FC) $(FCFLAGS) -fsyntax-only $< + $(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $< fortran.lo: libgomp_f.h fortran.o: libgomp_f.h env.lo: libgomp_f.h diff --git a/libgomp/allocator.c b/libgomp/allocator.c index 7166538..2790733 100644 --- a/libgomp/allocator.c +++ b/libgomp/allocator.c @@ -205,11 +205,12 @@ omp_destroy_allocator (omp_allocator_handle_t allocator) ialias (omp_init_allocator) ialias (omp_destroy_allocator) -void * -omp_alloc (size_t size, omp_allocator_handle_t allocator) +static void * +omp_aligned_alloc (size_t alignment, size_t size, + omp_allocator_handle_t allocator) { struct omp_allocator_data *allocator_data; - size_t alignment, new_size; + size_t new_size; void *ptr, *ret; if (__builtin_expect (size == 0, 0)) @@ -227,12 +228,14 @@ retry: if (allocator > omp_max_predefined_alloc) { allocator_data = (struct omp_allocator_data *) allocator; - alignment = allocator_data->alignment; + if (alignment < allocator_data->alignment) + alignment = allocator_data->alignment; } else { allocator_data = NULL; - alignment = sizeof (void *); + if (alignment < sizeof (void *)) + alignment = sizeof (void *); } new_size = sizeof (struct omp_mem_header); @@ -339,6 +342,27 @@ fail: return NULL; } +void * +omp_alloc (size_t size, omp_allocator_handle_t allocator) +{ + return omp_aligned_alloc (1, size, allocator); +} + +/* Like omp_aligned_alloc, but apply on top of that: + "For allocations that arise from this ... the null_fb value of the + fallback allocator trait behaves as if the abort_fb had been specified." */ + +void * +GOMP_alloc (size_t alignment, size_t size, uintptr_t allocator) +{ + void *ret = omp_aligned_alloc (alignment, size, + (omp_allocator_handle_t) allocator); + if (__builtin_expect (ret == NULL, 0) && size) + gomp_fatal ("Out of memory allocating %lu bytes", + (unsigned long) size); + return ret; +} + void omp_free (void *ptr, omp_allocator_handle_t allocator) { @@ -366,3 +390,11 @@ omp_free (void *ptr, omp_allocator_handle_t allocator) } free (data->ptr); } + +ialias (omp_free) + +void +GOMP_free (void *ptr, uintptr_t allocator) +{ + return omp_free (ptr, (omp_allocator_handle_t) allocator); +} diff --git a/libgomp/configure b/libgomp/configure index e48371d..d412596 100755 --- a/libgomp/configure +++ b/libgomp/configure @@ -7631,23 +7631,25 @@ _LT_EOF fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5 $as_echo "$lt_cv_ld_force_load" >&6; } - case $host_os in - rhapsody* | darwin1.[012]) + # Allow for Darwin 4-7 (macOS 10.0-10.3) although these are not expect to + # build without first building modern cctools / linker. + case $host_cpu-$host_os in + *-rhapsody* | *-darwin1.[012]) _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; - darwin1.*) + *-darwin1.*) _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; - darwin*) # darwin 5.x on - # if running on 10.5 or later, the deployment target defaults - # to the OS version, if on x86, and 10.4, the deployment - # target defaults to 10.4. Don't you love it? - case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in - 10.0,*86*-darwin8*|10.0,*-darwin[91]*) - _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + *-darwin*) + # darwin 5.x (macOS 10.1) onwards we only need to adjust when the + # deployment target is forced to an earlier version. + case ${MACOSX_DEPLOYMENT_TARGET-UNSET},$host in + UNSET,*-darwin[89]*|UNSET,*-darwin[12][0123456789]*) + ;; 10.[012][,.]*) - _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; - 10.*) - _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; - esac + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' + ;; + *) + ;; + esac ;; esac if test "$lt_cv_apple_cc_single_mod" = "yes"; then @@ -9713,7 +9715,7 @@ if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi if test "$GCC" = yes && test "$with_gnu_ld" = no; then case $host_cpu in hppa*64*) - archive_cmds='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_cmds='$CC -shared ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds='$CC -shared -fPIC ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' @@ -9725,7 +9727,7 @@ if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi else case $host_cpu in hppa*64*) - archive_cmds='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' @@ -11429,7 +11431,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11432 "configure" +#line 11434 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11535,7 +11537,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11538 "configure" +#line 11540 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -13571,7 +13573,7 @@ if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi if test "$GCC" = yes && test "$with_gnu_ld" = no; then case $host_cpu in hppa*64*) - archive_cmds_FC='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_cmds_FC='$CC -shared ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds_FC='$CC -shared -fPIC ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' @@ -13583,7 +13585,7 @@ if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi else case $host_cpu in hppa*64*) - archive_cmds_FC='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_cmds_FC='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds_FC='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' diff --git a/libgomp/env.c b/libgomp/env.c index ab22525..5a49ae6 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -68,12 +68,11 @@ struct gomp_task_icv gomp_global_icv = { .run_sched_chunk_size = 1, .default_device_var = 0, .dyn_var = false, - .nest_var = false, + .max_active_levels_var = 1, .bind_var = omp_proc_bind_false, .target_data = NULL }; -unsigned long gomp_max_active_levels_var = gomp_supported_active_levels; bool gomp_cancel_var = false; enum gomp_target_offload_t gomp_target_offload_var = GOMP_TARGET_OFFLOAD_DEFAULT; @@ -959,16 +958,17 @@ parse_spincount (const char *name, unsigned long long *pvalue) } /* Parse a boolean value for environment variable NAME and store the - result in VALUE. */ + result in VALUE. Return true if one was present and it was + successfully parsed. */ -static void +static bool parse_boolean (const char *name, bool *value) { const char *env; env = getenv (name); if (env == NULL) - return; + return false; while (isspace ((unsigned char) *env)) ++env; @@ -987,7 +987,11 @@ parse_boolean (const char *name, bool *value) while (isspace ((unsigned char) *env)) ++env; if (*env != '\0') - gomp_error ("Invalid value for environment variable %s", name); + { + gomp_error ("Invalid value for environment variable %s", name); + return false; + } + return true; } /* Parse the OMP_WAIT_POLICY environment variable and return the value. */ @@ -1252,7 +1256,7 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) fprintf (stderr, " OMP_DYNAMIC = '%s'\n", gomp_global_icv.dyn_var ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NESTED = '%s'\n", - gomp_global_icv.nest_var ? "TRUE" : "FALSE"); + gomp_global_icv.max_active_levels_var > 1 ? "TRUE" : "FALSE"); fprintf (stderr, " OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var); for (i = 1; i < gomp_nthreads_var_list_len; i++) @@ -1344,8 +1348,8 @@ handle_omp_display_env (unsigned long stacksize, int wait_policy) wait_policy > 0 ? "ACTIVE" : "PASSIVE"); fprintf (stderr, " OMP_THREAD_LIMIT = '%u'\n", gomp_global_icv.thread_limit_var); - fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%lu'\n", - gomp_max_active_levels_var); + fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%u'\n", + gomp_global_icv.max_active_levels_var); fprintf (stderr, " OMP_CANCELLATION = '%s'\n", gomp_cancel_var ? "TRUE" : "FALSE"); @@ -1410,6 +1414,7 @@ static void __attribute__((constructor)) initialize_env (void) { unsigned long thread_limit_var, stacksize = GOMP_DEFAULT_STACKSIZE; + unsigned long max_active_levels_var; int wait_policy; /* Do a compile time check that mkomp_h.pl did good job. */ @@ -1417,16 +1422,11 @@ initialize_env (void) parse_schedule (); parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var); - parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); parse_boolean ("OMP_DISPLAY_AFFINITY", &gomp_display_affinity_var); parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); parse_target_offload ("OMP_TARGET_OFFLOAD", &gomp_target_offload_var); parse_int ("OMP_MAX_TASK_PRIORITY", &gomp_max_task_priority_var, true); - parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, - true); - if (gomp_max_active_levels_var > gomp_supported_active_levels) - gomp_max_active_levels_var = gomp_supported_active_levels; gomp_def_allocator = parse_allocator (); if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) { @@ -1451,6 +1451,22 @@ initialize_env (void) &gomp_bind_var_list_len) && gomp_global_icv.bind_var == omp_proc_bind_false) ignore = true; + if (parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", + &max_active_levels_var, true)) + gomp_global_icv.max_active_levels_var + = (max_active_levels_var > gomp_supported_active_levels) + ? gomp_supported_active_levels : max_active_levels_var; + else + { + bool nested = true; + + /* OMP_NESTED is deprecated in OpenMP 5.0. */ + if (parse_boolean ("OMP_NESTED", &nested)) + gomp_global_icv.max_active_levels_var + = nested ? gomp_supported_active_levels : 1; + else if (gomp_nthreads_var_list_len > 1 || gomp_bind_var_list_len > 1) + gomp_global_icv.max_active_levels_var = gomp_supported_active_levels; + } /* Make sure OMP_PLACES and GOMP_CPU_AFFINITY env vars are always parsed if present in the environment. If OMP_PROC_BIND was set explicitly to false, don't populate places list though. If places diff --git a/libgomp/fortran.c b/libgomp/fortran.c index 029dec1..cd719f9 100644 --- a/libgomp/fortran.c +++ b/libgomp/fortran.c @@ -47,10 +47,13 @@ ialias_redirect (omp_test_lock) ialias_redirect (omp_test_nest_lock) # endif ialias_redirect (omp_set_dynamic) -ialias_redirect (omp_set_nested) -ialias_redirect (omp_set_num_threads) ialias_redirect (omp_get_dynamic) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +ialias_redirect (omp_set_nested) ialias_redirect (omp_get_nested) +#pragma GCC diagnostic pop +ialias_redirect (omp_set_num_threads) ialias_redirect (omp_in_parallel) ialias_redirect (omp_get_max_threads) ialias_redirect (omp_get_num_procs) @@ -281,6 +284,8 @@ omp_set_dynamic_8_ (const int64_t *set) omp_set_dynamic (!!*set); } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" void omp_set_nested_ (const int32_t *set) { @@ -292,6 +297,7 @@ omp_set_nested_8_ (const int64_t *set) { omp_set_nested (!!*set); } +#pragma GCC diagnostic pop void omp_set_num_threads_ (const int32_t *set) @@ -311,11 +317,14 @@ omp_get_dynamic_ (void) return omp_get_dynamic (); } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" int32_t omp_get_nested_ (void) { return omp_get_nested (); } +#pragma GCC diagnostic pop int32_t omp_in_parallel_ (void) diff --git a/libgomp/icv.c b/libgomp/icv.c index 4da6527..c0c0305 100644 --- a/libgomp/icv.c +++ b/libgomp/icv.c @@ -51,19 +51,26 @@ omp_get_dynamic (void) return icv->dyn_var; } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" void omp_set_nested (int val) { struct gomp_task_icv *icv = gomp_icv (true); - icv->nest_var = val; + if (val) + icv->max_active_levels_var = gomp_supported_active_levels; + else if (icv->max_active_levels_var > 1) + icv->max_active_levels_var = 1; } int omp_get_nested (void) { struct gomp_task_icv *icv = gomp_icv (false); - return icv->nest_var; + return (icv->max_active_levels_var > 1 + && icv->max_active_levels_var > omp_get_active_level ()); } +#pragma GCC diagnostic pop void omp_set_schedule (omp_sched_t kind, int chunk_size) @@ -117,17 +124,20 @@ omp_set_max_active_levels (int max_levels) { if (max_levels >= 0) { + struct gomp_task_icv *icv = gomp_icv (true); + if (max_levels <= gomp_supported_active_levels) - gomp_max_active_levels_var = max_levels; + icv->max_active_levels_var = max_levels; else - gomp_max_active_levels_var = gomp_supported_active_levels; + icv->max_active_levels_var = gomp_supported_active_levels; } } int omp_get_max_active_levels (void) { - return gomp_max_active_levels_var; + struct gomp_task_icv *icv = gomp_icv (false); + return icv->max_active_levels_var; } int @@ -222,10 +232,13 @@ omp_get_default_allocator (void) } ialias (omp_set_dynamic) -ialias (omp_set_nested) -ialias (omp_set_num_threads) ialias (omp_get_dynamic) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +ialias (omp_set_nested) ialias (omp_get_nested) +#pragma GCC diagnostic pop +ialias (omp_set_num_threads) ialias (omp_set_schedule) ialias (omp_get_schedule) ialias (omp_get_max_threads) diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index da7ac03..070d29c 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -428,7 +428,7 @@ struct gomp_task_icv int default_device_var; unsigned int thread_limit_var; bool dyn_var; - bool nest_var; + unsigned char max_active_levels_var; char bind_var; /* Internal ICV. */ struct target_mem_desc *target_data; @@ -441,13 +441,12 @@ enum gomp_target_offload_t GOMP_TARGET_OFFLOAD_DISABLED }; -#define gomp_supported_active_levels INT_MAX +#define gomp_supported_active_levels UCHAR_MAX extern struct gomp_task_icv gomp_global_icv; #ifndef HAVE_SYNC_BUILTINS extern gomp_mutex_t gomp_managed_threads_lock; #endif -extern unsigned long gomp_max_active_levels_var; extern bool gomp_cancel_var; extern enum gomp_target_offload_t gomp_target_offload_var; extern int gomp_max_task_priority_var; @@ -1162,10 +1161,10 @@ struct gomp_device_descr /* Kind of the pragma, for which gomp_map_vars () is called. */ enum gomp_map_vars_kind { - GOMP_MAP_VARS_OPENACC, - GOMP_MAP_VARS_TARGET, - GOMP_MAP_VARS_DATA, - GOMP_MAP_VARS_ENTER_DATA + GOMP_MAP_VARS_OPENACC = 1, + GOMP_MAP_VARS_TARGET = 2, + GOMP_MAP_VARS_DATA = 4, + GOMP_MAP_VARS_ENTER_DATA = 8 }; extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *, diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index c5f52f7..2c95f78 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -359,6 +359,12 @@ GOMP_5.0 { GOMP_workshare_task_reduction_unregister; } GOMP_4.5; +GOMP_5.0.1 { + global: + GOMP_alloc; + GOMP_free; +} GOMP_5.0; + OACC_2.0 { global: acc_get_num_devices; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 6937063..9e486c7 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -68,7 +68,7 @@ Boston, MA 02110-1301, USA@* @page -@node Top +@node Top, Enabling OpenMP @top Introduction @cindex Introduction @@ -487,10 +487,20 @@ This function returns @code{true} if nested parallel regions are enabled, @code{false} otherwise. Here, @code{true} and @code{false} represent their language-specific counterparts. -Nested parallel regions may be initialized at startup by the -@env{OMP_NESTED} environment variable or at runtime using -@code{omp_set_nested}. If undefined, nested parallel regions are -disabled by default. +The state of nested parallel regions at startup depends on several +environment variables. If @env{OMP_MAX_ACTIVE_LEVELS} is defined +and is set to greater than one, then nested parallel regions will be +enabled. If not defined, then the value of the @env{OMP_NESTED} +environment variable will be followed if defined. If neither are +defined, then if either @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND} +are defined with a list of more than one value, then nested parallel +regions are enabled. If none of these are defined, then nested parallel +regions are disabled by default. + +Nested parallel regions can be enabled or disabled at runtime using +@code{omp_set_nested}, or by setting the maximum number of nested +regions with @code{omp_set_max_active_levels} to one to disable, or +above one to enable. @item @emph{C/C++}: @multitable @columnfractions .20 .80 @@ -503,7 +513,8 @@ disabled by default. @end multitable @item @emph{See also}: -@ref{omp_set_nested}, @ref{OMP_NESTED} +@ref{omp_set_max_active_levels}, @ref{omp_set_nested}, +@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.11. @@ -964,6 +975,10 @@ are allowed to create new teams. The function takes the language-specific equivalent of @code{true} and @code{false}, where @code{true} enables dynamic adjustment of team sizes and @code{false} disables it. +Enabling nested parallel regions will also set the maximum number of +active nested regions to the maximum supported. Disabling nested parallel +regions will set the maximum number of active nested regions to one. + @item @emph{C/C++}: @multitable @columnfractions .20 .80 @item @emph{Prototype}: @tab @code{void omp_set_nested(int nested);} @@ -976,7 +991,8 @@ dynamic adjustment of team sizes and @code{false} disables it. @end multitable @item @emph{See also}: -@ref{OMP_NESTED}, @ref{omp_get_nested} +@ref{omp_get_nested}, @ref{omp_set_max_active_levels}, +@ref{OMP_MAX_ACTIVE_LEVELS}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.10. @@ -1502,10 +1518,14 @@ disabled by default. @item @emph{Description}: Specifies the initial value for the maximum number of nested parallel regions. The value of this variable shall be a positive integer. -If undefined, the number of active levels is unlimited. +If undefined, then if @env{OMP_NESTED} is defined and set to true, or +if @env{OMP_NUM_THREADS} or @env{OMP_PROC_BIND} are defined and set to +a list with more than one item, the maximum number of nested parallel +regions will be initialized to the largest number supported, otherwise +it will be set to one. @item @emph{See also}: -@ref{omp_set_max_active_levels} +@ref{omp_set_max_active_levels}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.9 @@ -1541,11 +1561,16 @@ integer, and zero is allowed. If undefined, the default priority is @item @emph{Description}: Enable or disable nested parallel regions, i.e., whether team members are allowed to create new teams. The value of this environment variable -shall be @code{TRUE} or @code{FALSE}. If undefined, nested parallel -regions are disabled by default. +shall be @code{TRUE} or @code{FALSE}. If set to @code{TRUE}, the number +of maximum active nested regions supported will by default be set to the +maximum supported, otherwise it will be set to one. If +@env{OMP_MAX_ACTIVE_LEVELS} is defined, its setting will override this +setting. If both are undefined, nested parallel regions are enabled if +@env{OMP_NUM_THREADS} or @env{OMP_PROC_BINDS} are defined to a list with +more than one item, otherwise they are disabled by default. @item @emph{See also}: -@ref{omp_set_nested} +@ref{omp_set_max_active_levels}, @ref{omp_set_nested} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.6 @@ -1561,11 +1586,12 @@ regions are disabled by default. @item @emph{Description}: Specifies the default number of threads to use in parallel regions. The value of this variable shall be a comma-separated list of positive integers; -the value specified the number of threads to use for the corresponding nested -level. If undefined one thread per CPU is used. +the value specifies the number of threads to use for the corresponding nested +level. Specifying more than one item in the list will automatically enable +nesting by default. If undefined one thread per CPU is used. @item @emph{See also}: -@ref{omp_set_num_threads} +@ref{omp_set_num_threads}, @ref{OMP_NESTED} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.2 @@ -1586,13 +1612,15 @@ the thread affinity policy for the corresponding nesting level. With @code{MASTER} the worker threads are in the same place partition as the master thread. With @code{CLOSE} those are kept close to the master thread in contiguous place partitions. And with @code{SPREAD} a sparse distribution -across the place partitions is used. +across the place partitions is used. Specifying more than one item in the +list will automatically enable nesting by default. When undefined, @env{OMP_PROC_BIND} defaults to @code{TRUE} when @env{OMP_PLACES} or @env{GOMP_CPU_AFFINITY} is set and @code{FALSE} otherwise. @item @emph{See also}: -@ref{OMP_PLACES}, @ref{GOMP_CPU_AFFINITY}, @ref{omp_get_proc_bind} +@ref{omp_get_proc_bind}, @ref{GOMP_CPU_AFFINITY}, +@ref{OMP_NESTED}, @ref{OMP_PLACES} @item @emph{Reference}: @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 4.4 diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 59e3697..b20e186 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -357,6 +357,11 @@ extern void GOMP_teams (unsigned int, unsigned int); extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned, unsigned); +/* allocator.c */ + +extern void *GOMP_alloc (size_t, size_t, uintptr_t); +extern void GOMP_free (void *, uintptr_t); + /* oacc-async.c */ extern void GOACC_wait (int, int, ...); diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 65757ab..4c8f0e0 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -403,7 +403,8 @@ acc_map_data (void *h, void *d, size_t s) struct target_mem_desc *tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, - &kinds, true, GOMP_MAP_VARS_ENTER_DATA); + &kinds, true, + GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_ENTER_DATA); assert (tgt); assert (tgt->list_count == 1); splay_tree_key n = tgt->list[0].key; @@ -572,7 +573,8 @@ goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) struct target_mem_desc *tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, - kinds, true, GOMP_MAP_VARS_ENTER_DATA); + kinds, true, (GOMP_MAP_VARS_OPENACC + | GOMP_MAP_VARS_ENTER_DATA)); assert (tgt); assert (tgt->list_count == 1); n = tgt->list[0].key; @@ -1202,7 +1204,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, struct target_mem_desc *tgt = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL, &sizes[i], &kinds[i], true, - GOMP_MAP_VARS_ENTER_DATA); + (GOMP_MAP_VARS_OPENACC + | GOMP_MAP_VARS_ENTER_DATA)); assert (tgt); gomp_mutex_lock (&acc_dev->lock); diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index a9e6c44..4424a16 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -26,6 +26,12 @@ #ifndef _OMP_H #define _OMP_H 1 +#if defined(__GNUC__) && _OPENMP >= 201811 +# define __GOMP_DEPRECATED_5_0 __attribute__((__deprecated__)) +#else +# define __GOMP_DEPRECATED_5_0 +#endif + #ifndef _LIBGOMP_OMP_LOCK_DEFINED #define _LIBGOMP_OMP_LOCK_DEFINED 1 /* These two structures get edited by the libgomp build process to @@ -66,18 +72,19 @@ typedef enum omp_proc_bind_t typedef enum omp_sync_hint_t { omp_sync_hint_none = 0, - omp_lock_hint_none = omp_sync_hint_none, + omp_lock_hint_none __GOMP_DEPRECATED_5_0 = omp_sync_hint_none, omp_sync_hint_uncontended = 1, - omp_lock_hint_uncontended = omp_sync_hint_uncontended, + omp_lock_hint_uncontended __GOMP_DEPRECATED_5_0 = omp_sync_hint_uncontended, omp_sync_hint_contended = 2, - omp_lock_hint_contended = omp_sync_hint_contended, + omp_lock_hint_contended __GOMP_DEPRECATED_5_0 = omp_sync_hint_contended, omp_sync_hint_nonspeculative = 4, - omp_lock_hint_nonspeculative = omp_sync_hint_nonspeculative, + omp_lock_hint_nonspeculative __GOMP_DEPRECATED_5_0 + = omp_sync_hint_nonspeculative, omp_sync_hint_speculative = 8, - omp_lock_hint_speculative = omp_sync_hint_speculative + omp_lock_hint_speculative __GOMP_DEPRECATED_5_0 = omp_sync_hint_speculative } omp_sync_hint_t; -typedef omp_sync_hint_t omp_lock_hint_t; +typedef __GOMP_DEPRECATED_5_0 omp_sync_hint_t omp_lock_hint_t; typedef struct __attribute__((__aligned__ (sizeof (void *)))) omp_depend_t { @@ -184,8 +191,8 @@ extern int omp_in_parallel (void) __GOMP_NOTHROW; extern void omp_set_dynamic (int) __GOMP_NOTHROW; extern int omp_get_dynamic (void) __GOMP_NOTHROW; -extern void omp_set_nested (int) __GOMP_NOTHROW; -extern int omp_get_nested (void) __GOMP_NOTHROW; +extern void omp_set_nested (int) __GOMP_NOTHROW __GOMP_DEPRECATED_5_0; +extern int omp_get_nested (void) __GOMP_NOTHROW __GOMP_DEPRECATED_5_0; extern void omp_init_lock (omp_lock_t *) __GOMP_NOTHROW; extern void omp_init_lock_with_hint (omp_lock_t *, omp_sync_hint_t) @@ -274,7 +281,7 @@ extern void omp_set_default_allocator (omp_allocator_handle_t) __GOMP_NOTHROW; extern omp_allocator_handle_t omp_get_default_allocator (void) __GOMP_NOTHROW; extern void *omp_alloc (__SIZE_TYPE__, omp_allocator_handle_t __GOMP_DEFAULT_NULL_ALLOCATOR) - __GOMP_NOTHROW; + __GOMP_NOTHROW __attribute__((__malloc__, __alloc_size__ (1))); extern void omp_free (void *, omp_allocator_handle_t __GOMP_DEFAULT_NULL_ALLOCATOR) __GOMP_NOTHROW; diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in index 2fae57b..3b7f0cb 100644 --- a/libgomp/omp_lib.f90.in +++ b/libgomp/omp_lib.f90.in @@ -644,4 +644,8 @@ end function end interface +#if _OPENMP >= 201811 +!GCC$ ATTRIBUTES DEPRECATED :: omp_get_nested, omp_set_nested +#endif + end module omp_lib diff --git a/libgomp/parallel.c b/libgomp/parallel.c index 2fe4f573..ebce492 100644 --- a/libgomp/parallel.c +++ b/libgomp/parallel.c @@ -53,11 +53,11 @@ gomp_resolve_num_threads (unsigned specified, unsigned count) /* Accelerators with fixed thread counts require this to return 1 for nested parallel regions. */ #if !defined(__AMDGCN__) && !defined(__nvptx__) - && !icv->nest_var + && icv->max_active_levels_var <= 1 #endif ) return 1; - else if (thr->ts.active_level >= gomp_max_active_levels_var) + else if (thr->ts.active_level >= icv->max_active_levels_var) return 1; /* If NUM_THREADS not specified, use nthreads_var. */ diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 0be350b..e53c6de 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -37,6 +37,7 @@ #include <stdbool.h> #include <limits.h> #include <hsa.h> +#include <hsa_ext_amd.h> #include <dlfcn.h> #include <signal.h> #include "libgomp-plugin.h" @@ -46,12 +47,8 @@ #include "oacc-int.h" #include <assert.h> -/* Additional definitions not in HSA 1.1. - FIXME: this needs to be updated in hsa.h for upstream, but the only source - right now is the ROCr source which may cause license issues. */ -#define HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT 0xA002 - /* These probably won't be in elf.h for a while. */ +#ifndef R_AMDGPU_NONE #define R_AMDGPU_NONE 0 #define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */ #define R_AMDGPU_ABS32_HI 2 /* (S + A) >> 32 */ @@ -64,8 +61,8 @@ #define R_AMDGPU_GOTPCREL32_HI 9 /* (G + GOT + A - P) >> 32 */ #define R_AMDGPU_REL32_LO 10 /* (S + A - P) & 0xFFFFFFFF */ #define R_AMDGPU_REL32_HI 11 /* (S + A - P) >> 32 */ -#define reserved 12 #define R_AMDGPU_RELATIVE64 13 /* B + A */ +#endif /* GCN specific definitions for asynchronous queues. */ diff --git a/libgomp/target.c b/libgomp/target.c index 1a8c67c..6152f58 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -683,7 +683,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); tgt->list_count = mapnum; - tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; + tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1; tgt->device_descr = devicep; tgt->prev = NULL; struct gomp_coalesce_buf cbuf, *cbufp = NULL; @@ -1020,7 +1020,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, if (not_found_cnt) tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); splay_tree_node array = tgt->array; - size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0; + size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY; uintptr_t field_tgt_base = 0; for (i = 0; i < mapnum; i++) @@ -1212,15 +1212,16 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference counts ('n->refcount', 'n->dynamic_refcount'). */ + + gomp_attach_pointer (devicep, aq, mem_map, n, + (uintptr_t) hostaddrs[i], sizes[i], + cbufp); } - else + else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0) { gomp_mutex_unlock (&devicep->lock); gomp_fatal ("outer struct not mapped for attach"); } - gomp_attach_pointer (devicep, aq, mem_map, n, - (uintptr_t) hostaddrs[i], sizes[i], - cbufp); continue; } default: @@ -1415,7 +1416,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, /* If the variable from "omp target enter data" map-list was already mapped, tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or gomp_exit_data. */ - if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) + if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0) { free (tgt); tgt = NULL; @@ -2476,6 +2477,19 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, } for (i = 0; i < mapnum; i++) + if ((kinds[i] & typemask) == GOMP_MAP_DETACH) + { + struct splay_tree_key_s cur_node; + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizeof (void *); + splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); + + if (n) + gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i], + false, NULL); + } + + for (i = 0; i < mapnum; i++) { struct splay_tree_key_s cur_node; unsigned char kind = kinds[i] & typemask; @@ -2512,7 +2526,9 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, cur_node.host_end - cur_node.host_start); if (k->refcount == 0) gomp_remove_var (devicep, k); + break; + case GOMP_MAP_DETACH: break; default: gomp_mutex_unlock (&devicep->lock); @@ -2621,6 +2637,14 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); i += j - i - 1; } + else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH) + { + /* An attach operation must be processed together with the mapped + base-pointer list item. */ + gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i], + true, GOMP_MAP_VARS_ENTER_DATA); + i += 1; + } else gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index 5d86e2a..72d0011 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -241,6 +241,12 @@ proc libgomp_init { args } { if { $offload_additional_options != "" } { lappend ALWAYS_CFLAGS "additional_flags=${offload_additional_options}" } + + # Tell warning from error diagnostics. This fits for C, C++, and Fortran. + global gcc_warning_prefix + set gcc_warning_prefix "\[Ww\]arning:" + global gcc_error_prefix + set gcc_error_prefix "(\[Ff\]atal )?\[Ee\]rror:" } # diff --git a/libgomp/testsuite/libgomp.c++/affinity-1.C b/libgomp/testsuite/libgomp.c++/affinity-1.C index d20b392..eff2316 100644 --- a/libgomp/testsuite/libgomp.c++/affinity-1.C +++ b/libgomp/testsuite/libgomp.c++/affinity-1.C @@ -1,4 +1,5 @@ // { dg-do run } // { dg-set-target-env-var OMP_PROC_BIND "true" } +// { dg-additional-options "-Wno-deprecated-declarations" } #include "../libgomp.c/affinity-1.c" diff --git a/libgomp/testsuite/libgomp.c++/allocate-1.C b/libgomp/testsuite/libgomp.c++/allocate-1.C new file mode 100644 index 0000000..0876719 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/allocate-1.C @@ -0,0 +1,207 @@ +#include <omp.h> +#include <stdlib.h> +#include <stdint.h> + +struct S { int a, b; }; + +void +foo (int &x, int &y, int &r, int &l, int (&l2)[4], int &l3, int &n, int *&p, + int *&q, int &px, struct S &s, omp_allocator_handle_t h, int fl) +{ + int i; + typedef int T[x]; + T v, w; + T &v2 = v; + T &w2 = w; + int r1[4] = { 0, 0, 0, 0 }; + int (&r2)[4] = r1; + int xo = x; + for (i = 0; i < x; i++) + w[i] = i; + for (i = 0; i < 4; i++) + p[i] = 0; + for (i = 0; i < 3; i++) + q[i] = 0; + #pragma omp parallel private (y, v2) firstprivate (x) allocate (x, y, v2) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42) + abort (); + #pragma omp barrier + *p2 = 1; + p1[0]++; + v2[0] = 7; + v2[41] = 8; + #pragma omp barrier + if (x != 43 || y != 1) + abort (); + if (v2[0] != 7 || v2[41] != 8) + abort (); + if ((fl & 2) && (((uintptr_t) p1 | (uintptr_t) p2 + | (uintptr_t) &v2[0]) & 63) != 0) + abort (); + } + x = xo; + #pragma omp teams + #pragma omp parallel private (y) firstprivate (x, w2) allocate (h: x, y, w2) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42 || w2[17] != 17 || w2[41] != 41) + abort (); + #pragma omp barrier + *p2 = 1; + p1[0]++; + w2[19]++; + #pragma omp barrier + if (x != 43 || y != 1 || w2[19] != 20) + abort (); + if ((fl & 1) && (((uintptr_t) p1 | (uintptr_t) p2 + | (uintptr_t) &w2[0]) & 63) != 0) + abort (); + } + x = xo; + #pragma omp parallel for private (y) firstprivate (x) allocate (h: x, y, r, l, n) reduction(+: r) lastprivate (l) linear (n: 16) + for (i = 0; i < 64; i++) + { + if (x != 42) + abort (); + y = 1; + l = i; + n += y + 15; + r += i; + if ((fl & 1) && (((uintptr_t) &x | (uintptr_t) &y | (uintptr_t) &r + | (uintptr_t) &l | (uintptr_t) &n) & 63) != 0) + abort (); + } + x = xo; + #pragma omp parallel + { + #pragma omp for lastprivate (l2) allocate (h: l2, l3) lastprivate (conditional: l3) + for (i = 0; i < 64; i++) + { + l2[0] = i; + l2[1] = i + 1; + l2[2] = i + 2; + l2[3] = i + 3; + if (i < 37) + l3 = i; + if ((fl & 1) && (((uintptr_t) &l2[0] | (uintptr_t) &l3) & 63) != 0) + abort (); + } + #pragma omp for reduction(+:p[2:px], q[:3], r2) allocate(h: p, q, r2) + for (i = 0; i < 32; i++) + { + p[2] += i; + p[3] += 2 * i; + q[0] += 3 * i; + q[2] += 4 * i; + r2[0] += 5 * i; + r2[3] += 6 * i; + /* Can't really rely on alignment of &p[0], the implementation could + allocate the whole array or do what GCC does and allocate only part + of it. */ + if ((fl & 1) && (((uintptr_t) &q[0] | (uintptr_t) &r2[0]) & 63) != 0) + abort (); + } + #pragma omp task private(y) firstprivate(x) allocate(x, y) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42) + abort (); + p1[0]++; + p2[0] = 21; + if (x != 43 || y != 21) + abort (); + if ((fl & 2) && (((uintptr_t) p1 | (uintptr_t) p2) & 63) != 0) + abort (); + } + #pragma omp task private(y) firstprivate(x) allocate(h: x, y) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42) + abort (); + p1[0]++; + p2[0] = 21; + if (x != 43 || y != 21) + abort (); + if ((fl & 1) && (((uintptr_t) p1 | (uintptr_t) p2) & 63) != 0) + abort (); + } + #pragma omp task private(y) firstprivate(s) allocate(s, y) + { + int *volatile p1 = &s.a; + int *volatile p2 = &s.b; + int *volatile p3 = &y; + if (s.a != 27 || s.b != 29) + abort (); + p1[0]++; + p2[0]++; + p3[0] = 21; + if (s.a != 28 || s.b != 30 || y != 21) + abort (); + if ((fl & 2) && (((uintptr_t) p1 | (uintptr_t) p3) & 63) != 0) + abort (); + } + #pragma omp task private(y) firstprivate(s) allocate(h: s, y) + { + int *volatile p1 = &s.a; + int *volatile p2 = &s.b; + int *volatile p3 = &y; + if (s.a != 27 || s.b != 29) + abort (); + p1[0]++; + p2[0]++; + p3[0] = 21; + if (s.a != 28 || s.b != 30 || y != 21) + abort (); + if ((fl & 1) && (((uintptr_t) p1 | (uintptr_t) p3) & 63) != 0) + abort (); + } + } + if (r != 64 * 63 / 2 || l != 63 || n != 8 + 16 * 64) + abort (); + if (l2[0] != 63 || l2[1] != 63 + 1 || l2[2] != 63 + 2 || l2[3] != 63 + 3 || l3 != 36) + abort (); + if (p[2] != (32 * 31) / 2 || p[3] != 2 * (32 * 31) / 2 + || q[0] != 3 * (32 * 31) / 2 || q[2] != 4 * (32 * 31) / 2 + || r2[0] != 5 * (32 * 31) / 2 || r2[3] != 6 * (32 * 31) / 2) + abort (); +} + +int +main () +{ + omp_alloctrait_t traits[3] + = { { omp_atk_alignment, 64 }, + { omp_atk_fallback, omp_atv_null_fb } }; + omp_allocator_handle_t a + = omp_init_allocator (omp_default_mem_space, 2, traits); + if (a == omp_null_allocator) + abort (); + omp_set_default_allocator (omp_default_mem_alloc); + struct S s = { 27, 29 }; + int p1[4], q1[3], px = 2; + int *p = p1; + int *q = q1; + int x = 42, y = 0, r = 0, l, l2[4], l3, n = 8; + foo (x, y, r, l, l2, l3, n, p, q, px, s, omp_null_allocator, 0); + x = 42; y = 0; r = 0; l = -1; l2[0] = -1; l2[1] = -1; + l2[2] = -1; l2[3] = -1; n = 8; + foo (x, y, r, l, l2, l3, n, p, q, px, s, omp_default_mem_alloc, 0); + x = 42; y = 0; r = 0; l = -1; l2[0] = -1; l2[1] = -1; + l2[2] = -1; l2[3] = -1; n = 8; + foo (x, y, r, l, l2, l3, n, p, q, px, s, a, 1); + x = 42; y = 0; r = 0; l = -1; l2[0] = -1; l2[1] = -1; + l2[2] = -1; l2[3] = -1; n = 8; + omp_set_default_allocator (a); + foo (x, y, r, l, l2, l3, n, p, q, px, s, omp_null_allocator, 3); + x = 42; y = 0; r = 0; l = -1; l2[0] = -1; l2[1] = -1; + l2[2] = -1; l2[3] = -1; n = 8; + foo (x, y, r, l, l2, l3, n, p, q, px, s, omp_default_mem_alloc, 2); + omp_destroy_allocator (a); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/allocate-1.c b/libgomp/testsuite/libgomp.c-c++-common/allocate-1.c new file mode 100644 index 0000000..4398ff9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/allocate-1.c @@ -0,0 +1,375 @@ +#include <omp.h> +#include <stdlib.h> +#include <stdint.h> + +struct S { int a, b; }; + +void +foo (int x, int *p, int *q, int px, omp_allocator_handle_t h, int fl) +{ + int y = 0, r = 0, i, i1, l, l2[4], l3, n = 8; + int i2, j2, n2 = 9, l4; + int i3, j3, n3 = 10, l5; + int i4, j4, n4 = 11, l6; + int i5; + int v[x], w[x]; + int r2[4] = { 0, 0, 0, 0 }; + int xo = x; + struct S s = { 27, 29 }; + for (i = 0; i < 4; i++) + p[i] = 0; + for (i = 0; i < 3; i++) + q[i] = 0; + for (i = 0; i < x; i++) + w[i] = i; + #pragma omp parallel private (y, v) firstprivate (x) allocate (x, y, v) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42) + abort (); + #pragma omp barrier + *p2 = 1; + p1[0]++; + v[0] = 7; + v[41] = 8; + #pragma omp barrier + if (x != 43 || y != 1) + abort (); + if (v[0] != 7 || v[41] != 8) + abort (); + if ((fl & 2) && (((uintptr_t) p1 | (uintptr_t) p2 + | (uintptr_t) &v[0]) & 63) != 0) + abort (); + } + x = xo; + #pragma omp teams + #pragma omp parallel private (y) firstprivate (x, w) allocate (h: x, y, w) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42 || w[17] != 17 || w[41] != 41) + abort (); + #pragma omp barrier + *p2 = 1; + p1[0]++; + w[19]++; + #pragma omp barrier + if (x != 43 || y != 1 || w[19] != 20) + abort (); + if ((fl & 1) && (((uintptr_t) p1 | (uintptr_t) p2 + | (uintptr_t) &w[0]) & 63) != 0) + abort (); + } + x = xo; + #pragma omp parallel for private (y) firstprivate (x) allocate (h: x, y, r, l, n) reduction(+: r) lastprivate (l) linear (n: 16) + for (i = 0; i < 64; i++) + { + if (x != 42) + abort (); + y = 1; + l = i; + n += y + 15; + r += i; + if ((fl & 1) && (((uintptr_t) &x | (uintptr_t) &y | (uintptr_t) &r + | (uintptr_t) &l | (uintptr_t) &n) & 63) != 0) + abort (); + } + x = xo; + #pragma omp parallel + { + #pragma omp for lastprivate (l2) private (i1) allocate (h: l2, l3, i1) lastprivate (conditional: l3) + for (i1 = 0; i1 < 64; i1++) + { + l2[0] = i1; + l2[1] = i1 + 1; + l2[2] = i1 + 2; + l2[3] = i1 + 3; + if (i1 < 37) + l3 = i1; + if ((fl & 1) && (((uintptr_t) &l2[0] | (uintptr_t) &l3 | (uintptr_t) &i1) & 63) != 0) + abort (); + } + #pragma omp for collapse(2) lastprivate(l4, i2, j2) linear (n2:17) allocate (h: n2, l4, i2, j2) + for (i2 = 3; i2 < 5; i2++) + for (j2 = 17; j2 < 22; j2 += 2) + { + n2 += 17; + l4 = i2 * 31 + j2; + if ((fl & 1) && (((uintptr_t) &l4 | (uintptr_t) &n2 + | (uintptr_t) &i2 | (uintptr_t) &j2) & 63) != 0) + abort (); + } + #pragma omp for collapse(2) lastprivate(l5, i3, j3) linear (n3:17) schedule (static, 3) allocate (n3, l5, i3, j3) + for (i3 = 3; i3 < 5; i3++) + for (j3 = 17; j3 < 23; j3 += 2) + { + n3 += 17; + l5 = i3 * 31 + j3; + if ((fl & 2) && (((uintptr_t) &l5 | (uintptr_t) &n3 + | (uintptr_t) &i3 | (uintptr_t) &j3) & 63) != 0) + abort (); + } + #pragma omp for collapse(2) lastprivate(l6, i4, j4) linear (n4:17) schedule (dynamic) allocate (h: n4, l6, i4, j4) + for (i4 = 3; i4 < 5; i4++) + for (j4 = 17; j4 < 22; j4 += 2) + { + n4 += 17; + l6 = i4 * 31 + j4; + if ((fl & 1) && (((uintptr_t) &l6 | (uintptr_t) &n4 + | (uintptr_t) &i4 | (uintptr_t) &j4) & 63) != 0) + abort (); + } + #pragma omp for lastprivate (i5) allocate (i5) + for (i5 = 1; i5 < 17; i5 += 3) + { + if ((fl & 2) && (((uintptr_t) &i5) & 63) != 0) + abort (); + } + #pragma omp for reduction(+:p[2:px], q[:3], r2) allocate(h: p, q, r2) + for (i = 0; i < 32; i++) + { + p[2] += i; + p[3] += 2 * i; + q[0] += 3 * i; + q[2] += 4 * i; + r2[0] += 5 * i; + r2[3] += 6 * i; + /* Can't really rely on alignment of &p[0], the implementation could + allocate the whole array or do what GCC does and allocate only part + of it. */ + if ((fl & 1) && (((uintptr_t) &q[0] | (uintptr_t) &r2[0]) & 63) != 0) + abort (); + } + #pragma omp task private(y) firstprivate(x) allocate(x, y) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42) + abort (); + p1[0]++; + p2[0] = 21; + if (x != 43 || y != 21) + abort (); + if ((fl & 2) && (((uintptr_t) p1 | (uintptr_t) p2) & 63) != 0) + abort (); + } + #pragma omp task private(y) firstprivate(x) allocate(h: x, y) + { + int *volatile p1 = &x; + int *volatile p2 = &y; + if (x != 42) + abort (); + p1[0]++; + p2[0] = 21; + if (x != 43 || y != 21) + abort (); + if ((fl & 1) && (((uintptr_t) p1 | (uintptr_t) p2) & 63) != 0) + abort (); + } + #pragma omp task private(y) firstprivate(s) allocate(s, y) + { + int *volatile p1 = &s.a; + int *volatile p2 = &s.b; + int *volatile p3 = &y; + if (s.a != 27 || s.b != 29) + abort (); + p1[0]++; + p2[0]++; + p3[0] = 21; + if (s.a != 28 || s.b != 30 || y != 21) + abort (); + if ((fl & 2) && (((uintptr_t) p1 | (uintptr_t) p3) & 63) != 0) + abort (); + } + #pragma omp task private(y) firstprivate(s) allocate(h: s, y) + { + int *volatile p1 = &s.a; + int *volatile p2 = &s.b; + int *volatile p3 = &y; + if (s.a != 27 || s.b != 29) + abort (); + p1[0]++; + p2[0]++; + p3[0] = 21; + if (s.a != 28 || s.b != 30 || y != 21) + abort (); + if ((fl & 1) && (((uintptr_t) p1 | (uintptr_t) p3) & 63) != 0) + abort (); + } + } + if (r != 64 * 63 / 2 || l != 63 || n != 8 + 16 * 64) + abort (); + if (l2[0] != 63 || l2[1] != 63 + 1 || l2[2] != 63 + 2 || l2[3] != 63 + 3 || l3 != 36) + abort (); + if (i2 != 5 || j2 != 23 || n2 != 9 + 6 * 17 || l4 != 4 * 31 + 21) + abort (); + if (i3 != 5 || j3 != 23 || n3 != 10 + 6 * 17 || l5 != 4 * 31 + 21) + abort (); + if (i4 != 5 || j4 != 23 || n4 != 11 + 6 * 17 || l6 != 4 * 31 + 21) + abort (); + if (i5 != 19) + abort (); + if (p[2] != (32 * 31) / 2 || p[3] != 2 * (32 * 31) / 2 + || q[0] != 3 * (32 * 31) / 2 || q[2] != 4 * (32 * 31) / 2 + || r2[0] != 5 * (32 * 31) / 2 || r2[3] != 6 * (32 * 31) / 2) + abort (); +} + +void +bar (int x, omp_allocator_handle_t h) +{ + int y = 0, r = 0, i, i1, l, l2[4], l3, n = 8; + int i2, j2, n2 = 9, l4; + int i3, j3, n3 = 10, l5; + int i4, j4, n4 = 11, l6; + int i5; + struct S s = { 27, 29 }; + int xo = x; + #pragma omp parallel private (y) firstprivate (x) allocate (x, y) + { + if (x != 42) + abort (); + #pragma omp barrier + y = 1; + x++; + #pragma omp barrier + if (x != 43 || y != 1) + abort (); + } + x = xo; + #pragma omp teams + #pragma omp parallel private (y) firstprivate (x) allocate (h: x, y) + { + if (x != 42) + abort (); + #pragma omp barrier + y = 1; + x++; + #pragma omp barrier + if (x != 43 || y != 1) + abort (); + } + x = xo; + #pragma omp parallel for private (y) firstprivate (x) allocate (h: x, y, r, l, n) reduction(+: r) lastprivate (l) linear (n: 16) + for (i = 0; i < 64; i++) + { + if (x != 42) + abort (); + y = 1; + l = i; + n += y + 15; + r += i; + } + x = xo; + #pragma omp parallel + { + #pragma omp for lastprivate (l2) private (i1) allocate (h: l2, l3, i1) lastprivate (conditional: l3) + for (i1 = 0; i1 < 64; i1++) + { + l2[0] = i1; + l2[1] = i1 + 1; + l2[2] = i1 + 2; + l2[3] = i1 + 3; + if (i1 < 37) + l3 = i1; + } + #pragma omp for collapse(2) lastprivate(l4, i2, j2) linear (n2:17) allocate (h: n2, l4, i2, j2) + for (i2 = 3; i2 < 5; i2++) + for (j2 = 17; j2 < 22; j2 += 2) + { + n2 += 17; + l4 = i2 * 31 + j2; + } + #pragma omp for collapse(2) lastprivate(l5, i3, j3) linear (n3:17) schedule (static, 3) allocate (n3, l5, i3, j3) + for (i3 = 3; i3 < 5; i3++) + for (j3 = 17; j3 < 23; j3 += 2) + { + n3 += 17; + l5 = i3 * 31 + j3; + } + #pragma omp for collapse(2) lastprivate(l6, i4, j4) linear (n4:17) schedule (dynamic) allocate (h: n4, l6, i4, j4) + for (i4 = 3; i4 < 5; i4++) + for (j4 = 17; j4 < 22; j4 += 2) + { + n4 += 17; + l6 = i4 * 31 + j4; + } + #pragma omp for lastprivate (i5) allocate (i5) + for (i5 = 1; i5 < 17; i5 += 3) + ; + #pragma omp task private(y) firstprivate(x) allocate(x, y) + { + if (x != 42) + abort (); + x++; + y = 21; + if (x != 43 || y != 21) + abort (); + } + #pragma omp task private(y) firstprivate(x) allocate(h: x, y) + { + if (x != 42) + abort (); + x++; + y = 21; + if (x != 43 || y != 21) + abort (); + } + #pragma omp task private(y) firstprivate(s) allocate(s, y) + { + if (s.a != 27 || s.b != 29) + abort (); + s.a++; + s.b++; + y = 21; + if (s.a != 28 || s.b != 30 || y != 21) + abort (); + } + #pragma omp task private(y) firstprivate(s) allocate(h: s, y) + { + if (s.a != 27 || s.b != 29) + abort (); + s.a++; + s.b++; + y = 21; + if (s.a != 28 || s.b != 30 || y != 21) + abort (); + } + } + if (r != 64 * 63 / 2 || l != 63 || n != 8 + 16 * 64) + abort (); + if (l2[0] != 63 || l2[1] != 63 + 1 || l2[2] != 63 + 2 || l2[3] != 63 + 3 || l3 != 36) + abort (); + if (i2 != 5 || j2 != 23 || n2 != 9 + 6 * 17 || l4 != 4 * 31 + 21) + abort (); + if (i3 != 5 || j3 != 23 || n3 != 10 + 6 * 17 || l5 != 4 * 31 + 21) + abort (); + if (i4 != 5 || j4 != 23 || n4 != 11 + 6 * 17 || l6 != 4 * 31 + 21) + abort (); + if (i5 != 19) + abort (); +} + +int +main () +{ + omp_alloctrait_t traits[3] + = { { omp_atk_alignment, 64 }, + { omp_atk_fallback, omp_atv_null_fb } }; + omp_allocator_handle_t a + = omp_init_allocator (omp_default_mem_space, 2, traits); + int p[4], q[3]; + if (a == omp_null_allocator) + abort (); + omp_set_default_allocator (omp_default_mem_alloc); + foo (42, p, q, 2, omp_null_allocator, 0); + foo (42, p, q, 2, omp_default_mem_alloc, 0); + foo (42, p, q, 2, a, 1); + omp_set_default_allocator (a); + foo (42, p, q, 2, omp_null_allocator, 3); + foo (42, p, q, 2, omp_default_mem_alloc, 2); + bar (42, a); + omp_destroy_allocator (a); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-1.c b/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-1.c new file mode 100644 index 0000000..e7deec6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-1.c @@ -0,0 +1,82 @@ +#include <stdlib.h> + +struct S +{ + int a, b; + int *ptr; + int c, d; +}; +typedef struct S S; + +#pragma omp declare target +int *gp; +#pragma omp end declare target + +#define N 10 +int main (void) +{ + /* Test to see if pointer attachment works, for scalar pointers, + and pointer fields in structures. */ + + int *ptr = (int *) malloc (sizeof (int) * N); + int *orig_ptr = ptr; + + #pragma omp target map (ptr, ptr[:N]) + { + for (int i = 0; i < N; i++) + ptr[i] = N - i; + } + + if (ptr != orig_ptr) + abort (); + + for (int i = 0; i < N; i++) + if (ptr[i] != N - i) + abort (); + + S s = { 0 }; + s.ptr = ptr; + #pragma omp target map (s, s.ptr[:N]) + { + for (int i = 0; i < N; i++) + s.ptr[i] = i; + + s.a = 1; + s.b = 2; + } + + if (s.ptr != ptr) + abort (); + + for (int i = 0; i < N; i++) + if (s.ptr[i] != i) + abort (); + + if (s.a != 1 || s.b != 2 || s.c != 0 || s.d != 0) + abort (); + + gp = (int *) malloc (sizeof (int) * N); + orig_ptr = gp; + + for (int i = 0; i < N; i++) + gp[i] = i - 1; + + #pragma omp target map (gp[:N]) + { + for (int i = 0; i < N; i++) + gp[i] += 1; + } + + if (gp != orig_ptr) + abort (); + + for (int i = 0; i < N; i++) + if (gp[i] != i) + abort (); + + free (ptr); + free (gp); + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.c/affinity-1.c b/libgomp/testsuite/libgomp.c/affinity-1.c index aeb0f4d..4c9f9d1 100644 --- a/libgomp/testsuite/libgomp.c/affinity-1.c +++ b/libgomp/testsuite/libgomp.c/affinity-1.c @@ -17,7 +17,8 @@ /* { dg-do run } */ /* { dg-set-target-env-var OMP_PROC_BIND "false" } */ -/* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl" { target *-*-linux* } } */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ +/* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl -Wno-deprecated-declarations" { target *-*-linux* } } */ #ifndef _GNU_SOURCE #define _GNU_SOURCE diff --git a/libgomp/testsuite/libgomp.c/affinity-2.c b/libgomp/testsuite/libgomp.c/affinity-2.c index f821657..8e5bb56 100644 --- a/libgomp/testsuite/libgomp.c/affinity-2.c +++ b/libgomp/testsuite/libgomp.c/affinity-2.c @@ -2,6 +2,7 @@ /* { dg-set-target-env-var OMP_PROC_BIND "spread,close" } */ /* { dg-set-target-env-var OMP_PLACES "{6,7}:4:-2,!{2,3}" } */ /* { dg-set-target-env-var OMP_NUM_THREADS "2" } */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c/appendix-a/a.15.1.c b/libgomp/testsuite/libgomp.c/appendix-a/a.15.1.c index c167dab..469da6a 100644 --- a/libgomp/testsuite/libgomp.c/appendix-a/a.15.1.c +++ b/libgomp/testsuite/libgomp.c/appendix-a/a.15.1.c @@ -1,4 +1,5 @@ /* { dg-do run } */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ #include <omp.h> #include <stdio.h> diff --git a/libgomp/testsuite/libgomp.c/declare-variant-1.c b/libgomp/testsuite/libgomp.c/declare-variant-1.c new file mode 100644 index 0000000..d16608f --- /dev/null +++ b/libgomp/testsuite/libgomp.c/declare-variant-1.c @@ -0,0 +1,54 @@ +/* { dg-do link { target vect_simd_clones } } */ +/* { dg-require-effective-target lto } */ +/* { dg-require-effective-target fpic } */ +/* { dg-require-effective-target shared } */ +/* { dg-additional-options "-fdump-tree-gimple -fdump-tree-optimized -O2 -fPIC -shared -flto -flto-partition=one" } */ +/* { dg-additional-options "-mno-sse3" { target { i?86-*-* x86_64-*-* } } } */ + +int +f01 (int a) +{ + asm volatile ("" : "+g" (a) : "g" (1) : "memory"); + return a; +} + +int +f02 (int a) +{ + asm volatile ("" : "+g" (a) : "g" (2) : "memory"); + return a; +} + +int +f03 (int a) +{ + asm volatile ("" : "+g" (a) : "g" (3) : "memory"); + return a; +} + +#pragma omp declare variant (f01) match (device={isa("avx512f")}) /* 4 or 8 */ +#pragma omp declare variant (f02) match (implementation={vendor(score(3):gnu)},device={kind(cpu)}) /* (1 or 2) + 3 */ +#pragma omp declare variant (f03) match (implementation={vendor(score(5):gnu)},device={kind(host)}) /* (1 or 2) + 5 */ +int +f04 (int a) +{ + asm volatile ("" : "+g" (a) : "g" (4) : "memory"); + return a; +} + +#pragma omp declare simd +int +test1 (int x) +{ + /* At gimplification time, we can't decide yet which function to call. */ + /* { dg-final { scan-tree-dump-times "f04 \\\(x" 2 "gimple" } } */ + /* After simd clones are created, the original non-clone test1 shall + call f03 (score 6), the sse2/avx/avx2 clones too, but avx512f clones + shall call f01 with score 8. */ + /* { dg-final { scan-ltrans-tree-dump-not "f04 \\\(x" "optimized" } } */ + /* { dg-final { scan-ltrans-tree-dump-times "f03 \\\(x" 14 "optimized" } } */ + /* { dg-final { scan-ltrans-tree-dump-times "f01 \\\(x" 4 "optimized" } } */ + int a = f04 (x); + int b = f04 (x); + return a + b; +} diff --git a/libgomp/testsuite/libgomp.c/lib-1.c b/libgomp/testsuite/libgomp.c/lib-1.c index 086036d..fc6f746 100644 --- a/libgomp/testsuite/libgomp.c/lib-1.c +++ b/libgomp/testsuite/libgomp.c/lib-1.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ + #include <stdlib.h> #include <omp.h> diff --git a/libgomp/testsuite/libgomp.c/nested-1.c b/libgomp/testsuite/libgomp.c/nested-1.c index d3cfb01..925f5c5 100644 --- a/libgomp/testsuite/libgomp.c/nested-1.c +++ b/libgomp/testsuite/libgomp.c/nested-1.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ + #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c/nested-2.c b/libgomp/testsuite/libgomp.c/nested-2.c index f52b074..ab113f1 100644 --- a/libgomp/testsuite/libgomp.c/nested-2.c +++ b/libgomp/testsuite/libgomp.c/nested-2.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ + #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c/nested-3.c b/libgomp/testsuite/libgomp.c/nested-3.c index 6186006..625ca88 100644 --- a/libgomp/testsuite/libgomp.c/nested-3.c +++ b/libgomp/testsuite/libgomp.c/nested-3.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ + #include <omp.h> #include <stdlib.h> #include <string.h> diff --git a/libgomp/testsuite/libgomp.c/pr32362-1.c b/libgomp/testsuite/libgomp.c/pr32362-1.c index 55807e3..33890df 100644 --- a/libgomp/testsuite/libgomp.c/pr32362-1.c +++ b/libgomp/testsuite/libgomp.c/pr32362-1.c @@ -1,5 +1,6 @@ /* PR middle-end/32362 */ /* { dg-do run } */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c/pr32362-2.c b/libgomp/testsuite/libgomp.c/pr32362-2.c index d4ce091..445ccbf 100644 --- a/libgomp/testsuite/libgomp.c/pr32362-2.c +++ b/libgomp/testsuite/libgomp.c/pr32362-2.c @@ -1,5 +1,6 @@ /* PR middle-end/32362 */ /* { dg-do run } */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c/pr32362-3.c b/libgomp/testsuite/libgomp.c/pr32362-3.c index 11a0a0a..1d0b19c 100644 --- a/libgomp/testsuite/libgomp.c/pr32362-3.c +++ b/libgomp/testsuite/libgomp.c/pr32362-3.c @@ -1,5 +1,6 @@ /* PR middle-end/32362 */ /* { dg-do run } */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c/pr35549.c b/libgomp/testsuite/libgomp.c/pr35549.c index 269a0c2..a492c56 100644 --- a/libgomp/testsuite/libgomp.c/pr35549.c +++ b/libgomp/testsuite/libgomp.c/pr35549.c @@ -1,5 +1,6 @@ /* PR middle-end/35549 */ /* { dg-do run } */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c/pr42942.c b/libgomp/testsuite/libgomp.c/pr42942.c index 5d57852..260dd90 100644 --- a/libgomp/testsuite/libgomp.c/pr42942.c +++ b/libgomp/testsuite/libgomp.c/pr42942.c @@ -1,5 +1,6 @@ /* PR libgomp/42942 */ /* { dg-do run } */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c/pr61200.c b/libgomp/testsuite/libgomp.c/pr61200.c index ba3ed37..c98c10a 100644 --- a/libgomp/testsuite/libgomp.c/pr61200.c +++ b/libgomp/testsuite/libgomp.c/pr61200.c @@ -1,5 +1,6 @@ /* PR libgomp/61200 */ /* { dg-do run } */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c/sort-1.c b/libgomp/testsuite/libgomp.c/sort-1.c index f706fa2..bcabea5 100644 --- a/libgomp/testsuite/libgomp.c/sort-1.c +++ b/libgomp/testsuite/libgomp.c/sort-1.c @@ -15,6 +15,8 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ + #include <limits.h> #include <omp.h> #include <stdbool.h> diff --git a/libgomp/testsuite/libgomp.c/target-32.c b/libgomp/testsuite/libgomp.c/target-32.c index 233877b..1444523 100644 --- a/libgomp/testsuite/libgomp.c/target-32.c +++ b/libgomp/testsuite/libgomp.c/target-32.c @@ -1,5 +1,6 @@ #include <stdlib.h> #include <unistd.h> +#include "usleep.h" int main () { @@ -18,28 +19,28 @@ int main () #pragma omp target nowait map(alloc: b) depend(in: d[2]) depend(out: d[3]) { - usleep (1000); + tgt_usleep (1000); #pragma omp atomic update b |= 4; } #pragma omp target nowait map(alloc: b) depend(in: d[2]) depend(out: d[4]) { - usleep (5000); + tgt_usleep (5000); #pragma omp atomic update b |= 1; } #pragma omp target nowait map(alloc: c) depend(in: d[3], d[4]) depend(out: d[5]) { - usleep (5000); + tgt_usleep (5000); #pragma omp atomic update c |= 8; } #pragma omp target nowait map(alloc: c) depend(in: d[3], d[4]) depend(out: d[6]) { - usleep (1000); + tgt_usleep (1000); #pragma omp atomic update c |= 2; } diff --git a/libgomp/testsuite/libgomp.c/target-42.c b/libgomp/testsuite/libgomp.c/target-42.c new file mode 100644 index 0000000..fc0e265 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-42.c @@ -0,0 +1,42 @@ +#include <stdio.h> + +int +on_nvptx (void) +{ + return 1; +} + +int +on_gcn (void) +{ + return 2; +} + +#pragma omp declare variant (on_nvptx) match(construct={target},device={arch(nvptx)}) +#pragma omp declare variant (on_gcn) match(construct={target},device={arch(gcn)}) +int +on (void) +{ + return 0; +} + +int +main () +{ + int v; + #pragma omp target map(from:v) + v = on (); + switch (v) + { + default: + printf ("Host fallback or unknown offloading\n"); + break; + case 1: + printf ("Offloading to NVidia PTX\n"); + break; + case 2: + printf ("Offloading to AMD GCN\n"); + break; + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-5.c b/libgomp/testsuite/libgomp.c/target-5.c index 4367443..21a69ea 100644 --- a/libgomp/testsuite/libgomp.c/target-5.c +++ b/libgomp/testsuite/libgomp.c/target-5.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ + #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c/target-6.c b/libgomp/testsuite/libgomp.c/target-6.c index ea35aa4..8ffcb5b 100644 --- a/libgomp/testsuite/libgomp.c/target-6.c +++ b/libgomp/testsuite/libgomp.c/target-6.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ + #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c/task-6.c b/libgomp/testsuite/libgomp.c/task-6.c new file mode 100644 index 0000000..e5fc758 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/task-6.c @@ -0,0 +1,47 @@ +#include <stdlib.h> +#include <unistd.h> + +int +main () +{ + int x = 0, y = 0; + #pragma omp parallel shared(x, y) + #pragma omp master + { + #pragma omp task depend(out:y) shared(x, y) + { + sleep (1); + x = 1; + y = 1; + } + #pragma omp task depend(inout:y) shared(x, y) + { + if (x != 1 || y != 1) + abort (); + y++; + } + } + if (x != 1 || y != 2) + abort (); + x = 0; + y = 0; + #pragma omp parallel + #pragma omp master + { + #pragma omp task depend(out:y) + { + sleep (1); + x = 1; + y = 1; + } + #pragma omp task depend(inout:y) + { + if (x != 1 || y != 1) + abort (); + y++; + } + } + if (x != 1 || y != 2) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/teams-1.c b/libgomp/testsuite/libgomp.c/teams-1.c index c5df837..977e5fc 100644 --- a/libgomp/testsuite/libgomp.c/teams-1.c +++ b/libgomp/testsuite/libgomp.c/teams-1.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ + #include <omp.h> #include <stdlib.h> diff --git a/libgomp/testsuite/libgomp.c/thread-limit-1.c b/libgomp/testsuite/libgomp.c/thread-limit-1.c index 1d9794a..c8f76f9 100644 --- a/libgomp/testsuite/libgomp.c/thread-limit-1.c +++ b/libgomp/testsuite/libgomp.c/thread-limit-1.c @@ -1,5 +1,6 @@ /* { dg-do run } */ /* { dg-set-target-env-var OMP_THREAD_LIMIT "6" } */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ #include <stdlib.h> #include <unistd.h> diff --git a/libgomp/testsuite/libgomp.c/thread-limit-2.c b/libgomp/testsuite/libgomp.c/thread-limit-2.c index 1a97fb6..2cff1fd 100644 --- a/libgomp/testsuite/libgomp.c/thread-limit-2.c +++ b/libgomp/testsuite/libgomp.c/thread-limit-2.c @@ -1,9 +1,11 @@ /* { dg-do run } */ /* { dg-set-target-env-var OMP_THREAD_LIMIT "9" } */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ #include <stdlib.h> #include <unistd.h> #include <omp.h> +#include "usleep.h" int main () @@ -48,7 +50,7 @@ main () v = ++cnt; if (v > 6) abort (); - usleep (10000); + tgt_usleep (10000); #pragma omp atomic --cnt; } diff --git a/libgomp/testsuite/libgomp.c/thread-limit-4.c b/libgomp/testsuite/libgomp.c/thread-limit-4.c index 5642e6a..351423c 100644 --- a/libgomp/testsuite/libgomp.c/thread-limit-4.c +++ b/libgomp/testsuite/libgomp.c/thread-limit-4.c @@ -1,5 +1,6 @@ /* { dg-do run } */ /* { dg-set-target-env-var OMP_THREAD_LIMIT "9" } */ +/* { dg-additional-options "-Wno-deprecated-declarations" } */ #include <stdlib.h> #include <unistd.h> diff --git a/libgomp/testsuite/libgomp.c/usleep.h b/libgomp/testsuite/libgomp.c/usleep.h new file mode 100644 index 0000000..669b41c --- /dev/null +++ b/libgomp/testsuite/libgomp.c/usleep.h @@ -0,0 +1,25 @@ +#include <unistd.h> + +int +fallback_usleep (useconds_t d) +{ + /* This function serves as a replacement for usleep in + this test case. It does not even attempt to be functionally + equivalent - we just want some sort of delay. */ + int i; + int N = d * 2000; + for (i = 0; i < N; i++) + asm volatile ("" : : : "memory"); + return 0; +} + +#pragma omp declare variant (fallback_usleep) match(construct={target},device={arch(nvptx)}) +#pragma omp declare variant (fallback_usleep) match(construct={target},device={arch(gcn)}) +#pragma omp declare variant (usleep) match(user={condition(1)}) +int +tgt_usleep (useconds_t d) +{ + return 0; +} + +#pragma omp declare target to (fallback_usleep, tgt_usleep) diff --git a/libgomp/testsuite/libgomp.fortran/affinity1.f90 b/libgomp/testsuite/libgomp.fortran/affinity1.f90 index 26b5185..ea84b83 100644 --- a/libgomp/testsuite/libgomp.fortran/affinity1.f90 +++ b/libgomp/testsuite/libgomp.fortran/affinity1.f90 @@ -3,6 +3,7 @@ ! { dg-set-target-env-var OMP_PROC_BIND "spread,close" } ! { dg-set-target-env-var OMP_PLACES "{6,7}:4:-2,!{2,3}" } ! { dg-set-target-env-var OMP_NUM_THREADS "2" } +! { dg-additional-options "-Wno-deprecated-declarations" } use omp_lib integer :: num, i, nump diff --git a/libgomp/testsuite/libgomp.fortran/lib1.f90 b/libgomp/testsuite/libgomp.fortran/lib1.f90 index 4e7f9b2..c99eb7b 100644 --- a/libgomp/testsuite/libgomp.fortran/lib1.f90 +++ b/libgomp/testsuite/libgomp.fortran/lib1.f90 @@ -1,4 +1,5 @@ ! { dg-do run } +! { dg-additional-options "-Wno-deprecated-declarations" } use omp_lib diff --git a/libgomp/testsuite/libgomp.fortran/lib2.f b/libgomp/testsuite/libgomp.fortran/lib2.f index 91b56c0..a25611b 100644 --- a/libgomp/testsuite/libgomp.fortran/lib2.f +++ b/libgomp/testsuite/libgomp.fortran/lib2.f @@ -1,4 +1,5 @@ C { dg-do run } +C { dg-additional-options "-Wno-deprecated-declarations" } USE OMP_LIB diff --git a/libgomp/testsuite/libgomp.fortran/nested1.f90 b/libgomp/testsuite/libgomp.fortran/nested1.f90 index f521b7b..bb3d0ed 100644 --- a/libgomp/testsuite/libgomp.fortran/nested1.f90 +++ b/libgomp/testsuite/libgomp.fortran/nested1.f90 @@ -1,4 +1,6 @@ ! { dg-do run } +! { dg-additional-options "-Wno-deprecated-declarations" } + program nested1 use omp_lib integer :: e1, e2, e3, e diff --git a/libgomp/testsuite/libgomp.fortran/scan-1.f90 b/libgomp/testsuite/libgomp.fortran/scan-1.f90 new file mode 100644 index 0000000..a6f8ef7 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/scan-1.f90 @@ -0,0 +1,115 @@ +! { dg-require-effective-target size32plus } + +module m + implicit none + integer r, a(1024), b(1024) +contains +subroutine foo (a, b) + integer, contiguous :: a(:), b(:) + integer :: i + !$omp do reduction (inscan, +:r) + do i = 1, 1024 + r = r + a(i) + !$omp scan inclusive(r) + b(i) = r + end do +end + +integer function bar () + integer s, i + s = 0 + !$omp parallel + !$omp do reduction (inscan, +:s) + do i = 1, 1024 + s = s + 2 * a(i) + !$omp scan inclusive(s) + b(i) = s + end do + !$omp end parallel + bar = s +end + +subroutine baz (a, b) + integer, contiguous :: a(:), b(:) + integer :: i + !$omp parallel do reduction (inscan, +:r) + do i = 1, 1024 + r = r + a(i) + !$omp scan inclusive(r) + b(i) = r + end do +end + +integer function qux () + integer s, i + s = 0 + !$omp parallel do reduction (inscan, +:s) + do i = 1, 1024 + s = s + 2 * a(i) + !$omp scan inclusive(s) + b(i) = s + end do + qux = s +end +end module m + +program main + use m + implicit none + + integer s, i + s = 0 + do i = 1, 1024 + a(i) = i-1 + b(i) = -1 + end do + + !$omp parallel + call foo (a, b) + !$omp end parallel + if (r /= 1024 * 1023 / 2) & + stop 1 + do i = 1, 1024 + s = s + i-1 + if (b(i) /= s) then + stop 2 + else + b(i) = 25 + endif + end do + + if (bar () /= 1024 * 1023) & + stop 3 + s = 0 + do i = 1, 1024 + s = s + 2 * (i-1) + if (b(i) /= s) then + stop 4 + else + b(i) = -1 + end if + end do + + r = 0 + call baz (a, b) + if (r /= 1024 * 1023 / 2) & + stop 5 + s = 0 + do i = 1, 1024 + s = s + i-1 + if (b(i) /= s) then + stop 6 + else + b(i) = -25 + endif + end do + + if (qux () /= 1024 * 1023) & + stop 6 + s = 0 + do i = 1, 1024 + s = s + 2 * (i-1) + if (b(i) /= s) & + stop 7 + end do +end program diff --git a/libgomp/testsuite/libgomp.fortran/teams1.f90 b/libgomp/testsuite/libgomp.fortran/teams1.f90 index 4f14607..0077a70 100644 --- a/libgomp/testsuite/libgomp.fortran/teams1.f90 +++ b/libgomp/testsuite/libgomp.fortran/teams1.f90 @@ -1,3 +1,5 @@ +! { dg-additional-options "-Wno-deprecated-declarations" } + program teams1 use omp_lib !$omp teams thread_limit (2) diff --git a/libgomp/testsuite/libgomp.oacc-c++/cache-1.C b/libgomp/testsuite/libgomp.oacc-c++/cache-1.C new file mode 100644 index 0000000..fcb1f84 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/cache-1.C @@ -0,0 +1,13 @@ +/* OpenACC 'cache' directive. */ + +/* See also corresponding C/C++ variant '../libgomp.oacc-c-c++-common/cache-1.c'. */ + +#include "../../../gcc/testsuite/g++.dg/goacc/cache-1.C" + +int +main (int argc, char *argv[]) +{ + test<0> (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/cache-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/cache-1.c index 16aaed5..c0dddb3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/cache-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/cache-1.c @@ -1,3 +1,13 @@ -/* OpenACC cache directive. */ +/* OpenACC 'cache' directive. */ + +/* See also corresponding C++ variant '../libgomp.oacc-c++/cache-1.C'. */ #include "../../../gcc/testsuite/c-c++-common/goacc/cache-1.c" + +int +main (int argc, char *argv[]) +{ + test (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c new file mode 100644 index 0000000..c7eae12 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c @@ -0,0 +1,8 @@ +/* { dg-additional-options "-fopenacc-kernels=decompose" } */ +/* Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'. + { dg-ice "TODO" } + TODO { dg-prune-output "during GIMPLE pass: omplower" } + TODO { dg-do link } */ + +#undef KERNELS_DECOMPOSE_ICE_HACK +#include "declare-vla.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c new file mode 100644 index 0000000..dd8a1c1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose.c @@ -0,0 +1,6 @@ +/* { dg-additional-options "-fopenacc-kernels=decompose" } */ + +/* See also 'declare-vla-kernels-decompose-ice-1.c'. */ + +#define KERNELS_DECOMPOSE_ICE_HACK +#include "declare-vla.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla.c index 7149357..3bd6331 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla.c @@ -38,6 +38,12 @@ f_data (void) for (i = 0; i < N; i++) A[i] = -i; + /* See 'declare-vla-kernels-decompose.c'. */ +#ifdef KERNELS_DECOMPOSE_ICE_HACK + (volatile int *) &i; + (volatile int *) &N; +#endif + # pragma acc kernels for (i = 0; i < N; i++) A[i] = i; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c new file mode 100644 index 0000000..e76e409 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c @@ -0,0 +1,46 @@ +/* Test OpenACC 'kernels' construct decomposition. */ + +/* { dg-additional-options "-fopt-info-omp-all" } */ +/* { dg-additional-options "-fopenacc-kernels=decompose" } */ + +/* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' + passed to 'incr' may be unset, and in that case, it will be set to [...]", + so to maintain compatibility with earlier Tcl releases, we manually + initialize counter variables: + { dg-line l_dummy[variable c_loop_i 0] } + { dg-message "dummy" "" { target iN-VAl-Id } l_dummy } to avoid + "WARNING: dg-line var l_dummy defined, but not used". */ + +#undef NDEBUG +#include <assert.h> + +int main() +{ + int a = 0; + /*TODO Without making 'a' addressable, for GCN offloading we will not see the expected value copied out. (But it does work for nvptx offloading, strange...) */ + (volatile int *) &a; +#define N 123 + int b[N] = { 0 }; + +#pragma acc kernels + { + int c = 234; /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */ + + /*TODO Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'. */ + (volatile int *) &c; + +#pragma acc loop independent gang /* { dg-line l_loop_i[incr c_loop_i] } */ + /* { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-optimized "assigned OpenACC gang loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */ + for (int i = 0; i < N; ++i) + b[i] = c; + + a = c; /* { dg-message "note: beginning 'gang-single' part in OpenACC 'kernels' region" } */ + } + + for (int i = 0; i < N; ++i) + assert (b[i] == 234); + assert (a == 234); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c index f6ca263..d453264 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c @@ -1,52 +1,11 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-DVECTOR_LENGTH=" } */ /* { dg-additional-options "-fopenacc-dim=::128" } */ -/* Minimized from ref-1.C. */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ -#include <stdio.h> +#include "pr85486.c" -#pragma acc routine vector -void __attribute__((noinline, noclone)) -Vector (int *ptr, int n, const int inc) -{ - #pragma acc loop vector - for (unsigned ix = 0; ix < n; ix++) - ptr[ix] += inc; -} - -int -main (void) -{ - const int n = 32, m=32; - - int ary[m][n]; - unsigned ix, iy; - - for (ix = m; ix--;) - for (iy = n; iy--;) - ary[ix][iy] = (1 << 16) + (ix << 8) + iy; - - int err = 0; - -#pragma acc parallel copy (ary) - { - Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16)); - } - - for (ix = m; ix--;) - for (iy = n; iy--;) - if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy)) - { - printf ("ary[%u][%u] = %x expected %x\n", - ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy)); - err++; - } - - if (err) - { - printf ("%d failed\n", err); - return 1; - } - - return 0; -} +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c index a959b90..33480a4 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c @@ -1,54 +1,11 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-DVECTOR_LENGTH=" } */ /* { dg-set-target-env-var "GOMP_OPENACC_DIM" "::128" } */ -/* Minimized from ref-1.C. */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ -#include <stdio.h> +#include "pr85486.c" -#pragma acc routine vector -void __attribute__((noinline, noclone)) -Vector (int *ptr, int n, const int inc) -{ - #pragma acc loop vector - for (unsigned ix = 0; ix < n; ix++) - ptr[ix] += inc; -} - -int -main (void) -{ - const int n = 32, m=32; - - int ary[m][n]; - unsigned ix, iy; - - for (ix = m; ix--;) - for (iy = n; iy--;) - ary[ix][iy] = (1 << 16) + (ix << 8) + iy; - - int err = 0; - -#pragma acc parallel copy (ary) - { - Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16)); - } - - for (ix = m; ix--;) - for (iy = n; iy--;) - if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy)) - { - printf ("ary[%u][%u] = %x expected %x\n", - ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy)); - err++; - } - - if (err) - { - printf ("%d failed\n", err); - return 1; - } - - return 0; -} - -/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c index 99c0805..0d98b82 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c @@ -1,4 +1,8 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-DVECTOR_LENGTH=vector_length(128)" } */ + +/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ /* Minimized from ref-1.C. */ @@ -27,7 +31,7 @@ main (void) int err = 0; -#pragma acc parallel copy (ary) vector_length (128) /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */ +#pragma acc parallel copy (ary) VECTOR_LENGTH /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */ { Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16)); } @@ -49,3 +53,6 @@ main (void) return 0; } + +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */ diff --git a/libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90 index 536b3f0..0b923d5 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/atomic_capture-1.f90 @@ -299,7 +299,7 @@ program main ! At most one iarr element can be 0. do i = 1, N if ((iarr(i) == 0 .and. i /= itmp) & - .or. iarr(i) < 0 .or. iarr(i) >= N) STOP 35 + .or. iarr(i) < 0 .or. iarr(i) > N) STOP 35 end do if (igot /= iexp) STOP 36 @@ -336,7 +336,7 @@ program main !$acc parallel loop copy (igot, itmp) do i = 0, N - 1 - iexpr = ibclr (-2, i) + iexpr = ibclr (-1, i) !$acc atomic capture iarr(i) = igot igot = iand (igot, iexpr) @@ -345,7 +345,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i - 1) < 0)) STOP 39 + if (.not. (popcnt(iarr(i - 1)) > 0)) STOP 39 end do if (igot /= iexp) STOP 40 @@ -363,7 +363,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i - 1) >= 0)) STOP 41 + if (.not. (popcnt(iarr(i - 1)) < 32)) STOP 41 end do if (igot /= iexp) STOP 42 @@ -381,7 +381,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i - 1) < 0)) STOP 43 + if (.not. (popcnt(iarr(i - 1)) > 0)) STOP 43 end do if (igot /= iexp) STOP 44 @@ -398,7 +398,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (1 <= iarr(i) .and. iarr(i) < iexp)) STOP 45 + if (.not. (1 <= iarr(i) .and. iarr(i) <= iexp)) STOP 45 end do if (igot /= iexp) STOP 46 @@ -415,7 +415,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i) == 1 .or. iarr(i) == N)) STOP 47 + if (.not. (iarr(i) >= 1 .or. iarr(i) <= N)) STOP 47 end do if (igot /= iexp) STOP 48 @@ -424,7 +424,7 @@ program main !$acc parallel loop copy (igot, itmp) do i = 0, N - 1 - iexpr = ibclr (-2, i) + iexpr = ibclr (-1, i) !$acc atomic capture iarr(i) = igot igot = iand (iexpr, igot) @@ -433,7 +433,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i - 1) < 0)) STOP 49 + if (.not. (popcnt(iarr(i - 1)) > 0)) STOP 49 end do if (igot /= iexp) STOP 50 @@ -451,7 +451,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i - 1) >= 0)) STOP 51 + if (.not. (popcnt(iarr(i - 1)) < 32)) STOP 51 end do if (igot /= iexp) STOP 52 @@ -469,7 +469,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i - 1) < 0)) STOP 53 + if (.not. (popcnt(iarr(i - 1)) > 0)) STOP 53 end do if (igot /= iexp) STOP 54 @@ -755,7 +755,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i) == iexp)) STOP 89 + if (.not. (iarr(i) <= i)) STOP 89 end do if (igot /= iexp) STOP 90 @@ -773,7 +773,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i - 1) <= 0)) STOP 91 + if (.not. (popcnt(iarr(i - 1)) < 32)) STOP 91 end do if (igot /= iexp) STOP 92 @@ -791,7 +791,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i - 1) >= -1)) STOP 93 + if (.not. (popcnt(iarr(i - 1)) > 0)) STOP 93 end do if (igot /= iexp) STOP 94 @@ -809,7 +809,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i - 1) <= 0)) STOP 95 + if (.not. (popcnt(iarr(i - 1)) < 32)) STOP 95 end do if (igot /= iexp) STOP 96 @@ -843,7 +843,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i) == iexp )) STOP 99 + if (.not. (iarr(i) <= i)) STOP 99 end do if (igot /= iexp) STOP 100 @@ -861,7 +861,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i - 1) <= 0)) STOP 101 + if (.not. (popcnt(iarr(i - 1)) < 32)) STOP 101 end do if (igot /= iexp) STOP 102 @@ -879,7 +879,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i - 1) >= iexp)) STOP 103 + if (.not. (popcnt(iarr(i - 1)) > 0)) STOP 103 end do if (igot /= iexp) STOP 104 @@ -897,7 +897,7 @@ program main !$acc end parallel loop do i = 1, N - if (.not. (iarr(i - 1) <= iexp)) STOP 105 + if (.not. (popcnt(iarr(i - 1)) < 32)) STOP 105 end do if (igot /= iexp) STOP 106 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/attach-descriptor-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/attach-descriptor-1.f90 index 960b9f9..2701192 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/attach-descriptor-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/attach-descriptor-1.f90 @@ -42,9 +42,8 @@ subroutine test(variant) stop 1 end if - ! FIXME: This warning is emitted on the wrong line number. - ! { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } 52 } !$acc serial present(myvar%arr2) + ! { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 } do i=1,10 myvar%arr1(i) = i + variant myvar%arr2(i) = i - variant diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90 new file mode 100644 index 0000000..99a7041 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90 @@ -0,0 +1,47 @@ +! { dg-do run } +! { dg-additional-options "-fopt-info-omp-all" } +! { dg-additional-options "-fopenacc-kernels=decompose" } + +! It's only with Tcl 8.5 (released in 2007) that "the variable 'varName' +! passed to 'incr' may be unset, and in that case, it will be set to [...]", +! so to maintain compatibility with earlier Tcl releases, we manually +! initialize counter variables: +! { dg-line l_dummy[variable c_loop_i 0] } +! { dg-message "dummy" "" { target iN-VAl-Id } l_dummy } to avoid +! "WARNING: dg-line var l_dummy defined, but not used". + +subroutine kernel(lo, hi, a, b, c) + implicit none + integer :: lo, hi, i + real, dimension(lo:hi) :: a, b, c + + !$acc kernels copyin(lo, hi) + !$acc loop independent ! { dg-line l_loop_i[incr c_loop_i] } + ! { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i } + ! { dg-optimized "assigned OpenACC gang vector loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } + do i = lo, hi + b(i) = a(i) + end do + !$acc loop independent ! { dg-line l_loop_i[incr c_loop_i] } + ! { dg-message "note: parallelized loop nest in OpenACC 'kernels' region" "" { target *-*-* } l_loop_i$c_loop_i } + ! { dg-optimized "assigned OpenACC gang vector loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } + do i = lo, hi + c(i) = b(i) + end do + !$acc end kernels +end subroutine kernel + +program main + integer :: n = 20 + real, dimension(1:20) :: a, b, c + + a(:) = 1 + b(:) = 2 + c(:) = 3 + + call kernel(1, n, a, b, c) + + do i = 1, n + if (c(i) .ne. 1) call abort + end do +end program main |