diff options
author | Jakub Jelinek <jakub@redhat.com> | 2013-10-11 11:26:50 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2013-10-11 11:26:50 +0200 |
commit | acf0174b6f7a3f8fe1e2a27361bbf87dfe454530 (patch) | |
tree | 2a3d60fbf15f9346c02647762dcc441fe3841855 /libgomp | |
parent | f7191ecdbd3adad32b561db40fac6978df6409fe (diff) | |
download | gcc-acf0174b6f7a3f8fe1e2a27361bbf87dfe454530.zip gcc-acf0174b6f7a3f8fe1e2a27361bbf87dfe454530.tar.gz gcc-acf0174b6f7a3f8fe1e2a27361bbf87dfe454530.tar.bz2 |
target.c: New file.
libgomp/
* target.c: New file.
* Makefile.am (libgomp_la_SOURCES): Add target.c.
* Makefile.in: Regenerated.
* libgomp_g.h (GOMP_task): Add depend argument.
(GOMP_barrier_cancel, GOMP_loop_end_cancel,
GOMP_sections_end_cancel, GOMP_target, GOMP_target_data,
GOMP_target_end_data, GOMP_target_update, GOMP_teams,
GOMP_parallel_loop_static, GOMP_parallel_loop_dynamic,
GOMP_parallel_loop_guided, GOMP_parallel_loop_runtime,
GOMP_parallel, GOMP_cancel, GOMP_cancellation_point,
GOMP_taskgroup_start, GOMP_taskgroup_end,
GOMP_parallel_sections): New prototypes.
* fortran.c (omp_is_initial_device): Add ialias_redirect.
(omp_is_initial_device_): New function.
(ULP, STR1, STR2, ialias_redirect): Removed.
(omp_get_cancellation_, omp_get_proc_bind_, omp_set_default_device_,
omp_set_default_device_8_, omp_get_default_device_,
omp_get_num_devices_, omp_get_num_teams_, omp_get_team_num_): New
functions.
* libgomp.map (GOMP_barrier_cancel, GOMP_loop_end_cancel,
GOMP_sections_end_cancel, GOMP_target, GOMP_target_data,
GOMP_target_end_data, GOMP_target_update, GOMP_teams): Export
@@GOMP_4.0.
(omp_is_initial_device, omp_is_initial_device_, omp_get_cancellation,
omp_get_cancellation_, omp_get_proc_bind, omp_get_proc_bind_,
omp_set_default_device, omp_set_default_device_,
omp_set_default_device_8_, omp_get_default_device,
omp_get_default_device_, omp_get_num_devices, omp_get_num_devices_,
omp_get_num_teams, omp_get_num_teams_, omp_get_team_num,
omp_get_team_num_): Export @@OMP_4.0.
* team.c (struct gomp_thread_start_data): Add place field.
(gomp_thread_start): Clear thr->thread_pool and
thr->task before returning. Use gomp_team_barrier_wait_final
instead of gomp_team_barrier_wait. Initialize thr->place.
(gomp_new_team): Initialize work_shares_to_free, work_share_cancelled,
team_cancelled and task_queued_count fields.
(gomp_free_pool_helper): Clear thr->thread_pool and thr->task
before calling pthread_exit.
(gomp_free_thread): No longer static. Use
gomp_managed_threads_lock instead of gomp_remaining_threads_lock.
(gomp_team_start): Add flags argument. Set
thr->thread_pool->threads_busy to nthreads immediately after creating
new pool. Use gomp_managed_threads_lock instead of
gomp_remaining_threads_lock. Handle OpenMP 4.0 affinity.
(gomp_team_end): Use gomp_managed_threads_lock instead of
gomp_remaining_threads_lock. Use gomp_team_barrier_wait_final instead
of gomp_team_barrier_wait. If team->team_cancelled, call
gomp_fini_worshare on ws chain starting at team->work_shares_to_free
rather than thr->ts.work_share.
(initialize_team): Don't call gomp_sem_init here.
* sections.c (GOMP_parallel_sections_start): Adjust gomp_team_start
caller.
(GOMP_parallel_sections, GOMP_sections_end_cancel): New functions.
* env.c (gomp_global_icv): Add default_device_var, target_data and
bind_var initializers.
(gomp_cpu_affinity, gomp_cpu_affinity_len): Remove.
(gomp_bind_var_list, gomp_bind_var_list_len, gomp_places_list,
gomp_places_list_len): New variables.
(parse_bind_var, parse_one_place, parse_places_var): New functions.
(parse_affinity): Rewritten to construct OMP_PLACES list with unit
sized places.
(gomp_cancel_var): New global variable.
(parse_int): New function.
(handle_omp_display_env): New function.
(initialize_env): Use it. Initialize default_device_var.
Parse OMP_CANCELLATION env var. Use parse_bind_var to parse
OMP_PROC_BIND instead of parse_boolean. Use parse_places_var for
OMP_PLACES parsing. Don't call parse_affinity if OMP_PLACES has
been successfully parsed (and call gomp_init_affinity in that case).
(omp_get_cancellation, omp_get_proc_bind, omp_set_default_device,
omp_get_default_device, omp_get_num_devices, omp_get_num_teams,
omp_get_team_num, omp_is_initial_device): New functions.
* libgomp.h: Include stdlib.h.
(ialias_ulp, ialias_str1, ialias_str2, ialias_redirect, ialias_call):
Define.
(struct target_mem_desc): Forward declare.
(struct gomp_task_icv): Add default_device_var, target_data, bind_var
and thread_limit_var fields.
(gomp_get_num_devices): New prototype.
(gomp_cancel_var): New extern decl.
(struct gomp_team): Add work_shares_to_free, work_share_cancelled,
team_cancelled and task_queued_count fields. Add comments about
task_{,queued_,running_}count.
(gomp_cancel_kind): New enum.
(gomp_work_share_end_cancel): New prototype.
(struct gomp_task): Add next_taskgroup, prev_taskgroup, taskgroup,
copy_ctors_done, dependers, depend_hash, depend_count, num_dependees
and depend fields.
(struct gomp_taskgroup): New type.
(struct gomp_task_depend_entry,
struct gomp_dependers_vec): New types.
(gomp_finish_task): Free depend_hash if non-NULL.
(struct gomp_team_state): Add place_partition_off
and place_partition_len fields.
(gomp_bind_var_list, gomp_bind_var_list_len, gomp_places_list,
gomp_places_list_len): New extern decls.
(struct gomp_thread): Add place field.
(gomp_cpu_affinity, gomp_cpu_affinity_len): Remove.
(gomp_init_thread_affinity): Add place argument.
(gomp_affinity_alloc, gomp_affinity_init_place, gomp_affinity_add_cpus,
gomp_affinity_remove_cpu, gomp_affinity_copy_place,
gomp_affinity_same_place, gomp_affinity_finalize_place_list,
gomp_affinity_init_level, gomp_affinity_print_place): New
prototypes.
(gomp_team_start): Add flags argument.
(gomp_thread_limit_var, gomp_remaining_threads_count,
gomp_remaining_threads_lock): Remove.
(gomp_managed_threads_lock): New variable.
(struct gomp_thread_pool): Add threads_busy field.
(gomp_free_thread): New prototype.
* task.c: Include hashtab.h.
(hash_entry_type): New typedef.
(htab_alloc, htab_free, htab_hash, htab_eq): New inlines.
(gomp_init_task): Clear dependers, depend_hash, depend_count,
copy_ctors_done and taskgroup fields.
(GOMP_task): Add depend argument, handle depend clauses. If
gomp_team_barrier_cancelled or if it's taskgroup has been
cancelled, don't queue or start new tasks. Set copy_ctors_done
field if needed. Initialize taskgroup field. If copy_ctors_done
and already cancelled, don't discard the task. If taskgroup is
non-NULL, enqueue the task into taskgroup queue. Increment
num_children field in taskgroup. Increment task_queued_count.
(gomp_task_run_pre, gomp_task_run_post_remove_parent,
gomp_task_run_post_remove_taskgroup): New inline functions.
(gomp_task_run_post_handle_depend_hash,
gomp_task_run_post_handle_dependers,
gomp_task_run_post_handle_depend): New functions.
(GOMP_taskwait): Use them. If more than one new tasks
have been queued, wake other threads if needed.
(gomp_barrier_handle_tasks): Likewise. If
gomp_team_barrier_cancelled, don't start any new tasks, just free
all tasks.
(GOMP_taskgroup_start, GOMP_taskgroup_end): New functions.
* omp_lib.f90.in
(omp_proc_bind_kind, omp_proc_bind_false,
omp_proc_bind_true, omp_proc_bind_master, omp_proc_bind_close,
omp_proc_bind_spread): New params.
(omp_get_cancellation, omp_get_proc_bind, omp_set_default_device,
omp_get_default_device, omp_get_num_devices, omp_get_num_teams,
omp_get_team_num, omp_is_initial_device): New interfaces.
(omp_get_dynamic, omp_get_nested, omp_in_parallel,
omp_get_max_threads, omp_get_num_procs, omp_get_num_threads,
omp_get_thread_num, omp_get_thread_limit, omp_set_max_active_levels,
omp_get_max_active_levels, omp_get_level, omp_get_ancestor_thread_num,
omp_get_team_size, omp_get_active_level, omp_in_final): Remove
useless use omp_lib_kinds.
* omp.h.in (omp_proc_bind_t): New typedef.
(omp_get_cancellation, omp_get_proc_bind, omp_set_default_device,
omp_get_default_device, omp_get_num_devices, omp_get_num_teams,
omp_get_team_num, omp_is_initial_device): New prototypes.
* loop.c (gomp_parallel_loop_start): Add flags argument, pass it
through to gomp_team_start.
(GOMP_parallel_loop_static_start, GOMP_parallel_loop_dynamic_start,
GOMP_parallel_loop_guided_start, GOMP_parallel_loop_runtime_start):
Adjust gomp_parallel_loop_start callers.
(GOMP_parallel_loop_static, GOMP_parallel_loop_dynamic,
GOMP_parallel_loop_guided, GOMP_parallel_loop_runtime,
GOMP_loop_end_cancel): New functions.
(GOMP_parallel_end): Add ialias_redirect.
* hashtab.h: New file.
* libgomp.texi (Environment Variables): Minor cleanup,
update section refs to OpenMP 4.0rc2.
(OMP_DISPLAY_ENV, GOMP_SPINCOUNT): Document these
environment variables.
* work.c (gomp_work_share_end, gomp_work_share_end_nowait): Set
team->work_shares_to_free to thr->ts.work_share before calling
free_work_share.
(gomp_work_share_end_cancel): New function.
* config/linux/proc.c: Include errno.h.
(gomp_get_cpuset_size, gomp_cpuset_size, gomp_cpusetp): New variables.
(gomp_cpuset_popcount): Add cpusetsize argument, use it instead of
sizeof (cpu_set_t) to determine number of iterations. Fix up check
extern decl. Use CPU_COUNT_S if available, or CPU_COUNT if
gomp_cpuset_size is sizeof (cpu_set_t).
(gomp_init_num_threads): Initialize gomp_cpuset_size,
gomp_get_cpuset_size and gomp_cpusetp here, use gomp_cpusetp instead
of &cpuset and pass gomp_cpuset_size instead of sizeof (cpu_set_t)
to pthread_getaffinity_np. Free and clear gomp_cpusetp if it didn't
contain any logical CPUs.
(get_num_procs): Don't call pthread_getaffinity_np if gomp_cpusetp
is NULL. Use gomp_cpusetp instead of &cpuset and pass
gomp_get_cpuset_size instead of sizeof (cpu_set_t) to
pthread_getaffinity_np. Check gomp_places_list instead of
gomp_cpu_affinity. Adjust gomp_cpuset_popcount caller.
* config/linux/bar.c (gomp_barrier_wait_end,
gomp_barrier_wait_last): Use BAR_* defines.
(gomp_team_barrier_wait_end): Likewise. Clear BAR_CANCELLED
from state where needed. Set work_share_cancelled to 0 on last
thread.
(gomp_team_barrier_wait_final, gomp_team_barrier_wait_cancel_end,
gomp_team_barrier_wait_cancel, gomp_team_barrier_cancel): New
functions.
* config/linux/proc.h (gomp_cpuset_popcount): Add attribute_hidden.
Add cpusetsize argument.
(gomp_cpuset_size, gomp_cpusetp): Declare.
* config/linux/affinity.c: Include errno.h, stdio.h and string.h.
(affinity_counter): Remove.
(CPU_ISSET_S, CPU_ZERO_S, CPU_SET_S, CPU_CLR_S): Define
if CPU_ALLOC_SIZE isn't defined.
(gomp_init_affinity): Rewritten, if gomp_places_list is NULL, try
silently create OMP_PLACES=threads, if it is non-NULL afterwards,
bind current thread to the first place.
(gomp_init_thread_affinity): Rewritten. Add place argument, just
pthread_setaffinity_np to gomp_places_list[place].
(gomp_affinity_alloc, gomp_affinity_init_place, gomp_affinity_add_cpus,
gomp_affinity_remove_cpu, gomp_affinity_copy_place,
gomp_affinity_same_place, gomp_affinity_finalize_place_list,
gomp_affinity_init_level, gomp_affinity_print_place): New functions.
* config/linux/bar.h (BAR_TASK_PENDING, BAR_WAS_LAST,
BAR_WAITING_FOR_TASK, BAR_INCR, BAR_CANCELLED): Define.
(gomp_barrier_t): Add awaited_final field.
(gomp_barrier_init): Initialize awaited_final field.
(gomp_team_barrier_wait_final, gomp_team_barrier_wait_cancel,
gomp_team_barrier_wait_cancel_end, gomp_team_barrier_cancel): New
prototypes.
(gomp_barrier_wait_start): Preserve BAR_CANCELLED bit. Use BAR_*
defines.
(gomp_barrier_wait_cancel_start, gomp_team_barrier_wait_final_start,
gomp_team_barrier_cancelled): New inline functions.
(gomp_barrier_last_thread,
gomp_team_barrier_set_task_pending,
gomp_team_barrier_clear_task_pending,
gomp_team_barrier_set_waiting_for_tasks,
gomp_team_barrier_waiting_for_tasks,
gomp_team_barrier_done): Use BAR_* defines.
* config/posix/bar.c (gomp_barrier_init): Clear cancellable field.
(gomp_barrier_wait_end): Use BAR_* defines.
(gomp_team_barrier_wait_end): Clear BAR_CANCELLED from state.
Set work_share_cancelled to 0 on last thread, use __atomic_load_n.
Use BAR_* defines.
(gomp_team_barrier_wait_cancel_end, gomp_team_barrier_wait_cancel,
gomp_team_barrier_cancel): New functions.
* config/posix/affinity.c (gomp_init_thread_affinity): Add place
argument.
(gomp_affinity_alloc, gomp_affinity_init_place, gomp_affinity_add_cpus,
gomp_affinity_remove_cpu, gomp_affinity_copy_place,
gomp_affinity_same_place, gomp_affinity_finalize_place_list,
gomp_affinity_init_level, gomp_affinity_print_place): New stubs.
* config/posix/bar.h (BAR_TASK_PENDING, BAR_WAS_LAST,
BAR_WAITING_FOR_TASK, BAR_INCR, BAR_CANCELLED): Define.
(gomp_barrier_t): Add cancellable field.
(gomp_team_barrier_wait_cancel, gomp_team_barrier_wait_cancel_end,
gomp_team_barrier_cancel): New prototypes.
(gomp_barrier_wait_start): Preserve BAR_CANCELLED bit.
(gomp_barrier_wait_cancel_start, gomp_team_barrier_wait_final,
gomp_team_barrier_cancelled): New inline functions.
(gomp_barrier_wait_start, gomp_barrier_last_thread,
gomp_team_barrier_set_task_pending,
gomp_team_barrier_clear_task_pending,
gomp_team_barrier_set_waiting_for_tasks,
gomp_team_barrier_waiting_for_tasks,
gomp_team_barrier_done): Use BAR_* defines.
* barrier.c (GOMP_barrier_cancel): New function.
* omp_lib.h.in (omp_proc_bind_kind, omp_proc_bind_false,
omp_proc_bind_true, omp_proc_bind_master, omp_proc_bind_close,
omp_proc_bind_spread): New params.
(omp_get_cancellation, omp_get_proc_bind, omp_set_default_device,
omp_get_default_device, omp_get_num_devices, omp_get_num_teams,
omp_get_team_num, omp_is_initial_device): New externals.
* parallel.c (GOMP_parallel, GOMP_cancel, GOMP_cancellation_point):
New functions.
(gomp_resolve_num_threads): Adjust for thread_limit now being in
icv->thread_limit_var. Use UINT_MAX instead of ULONG_MAX as
infinity. If not nested, just return minimum of max_num_threads
and icv->thread_limit_var and if thr->thread_pool, set threads_busy
to the returned value. Otherwise, don't update atomically
gomp_remaining_threads_count, but instead thr->thread_pool->threads_busy.
(GOMP_parallel_end): Adjust for thread_limit now being in
icv->thread_limit_var. Use UINT_MAX instead of ULONG_MAX as
infinity. Adjust threads_busy in the pool rather than
gomp_remaining_threads_count. Remember team->nthreads and call
gomp_team_end before adjusting threads_busy, if not nested
afterwards, just set it to 1 non-atomically. Add ialias.
(GOMP_parallel_start): Adjust gomp_team_start caller.
* testsuite/libgomp.c/atomic-14.c: Add parens to make it valid.
* testsuite/libgomp.c/affinity-1.c: New test.
* testsuite/libgomp.c/atomic-15.c: New test.
* testsuite/libgomp.c/atomic-16.c: New test.
* testsuite/libgomp.c/atomic-17.c: New test.
* testsuite/libgomp.c/cancel-for-1.c: New test.
* testsuite/libgomp.c/cancel-for-2.c: New test.
* testsuite/libgomp.c/cancel-parallel-1.c: New test.
* testsuite/libgomp.c/cancel-parallel-2.c: New test.
* testsuite/libgomp.c/cancel-parallel-3.c: New test.
* testsuite/libgomp.c/cancel-sections-1.c: New test.
* testsuite/libgomp.c/cancel-taskgroup-1.c: New test.
* testsuite/libgomp.c/cancel-taskgroup-2.c: New test.
* testsuite/libgomp.c/depend-1.c: New test.
* testsuite/libgomp.c/depend-2.c: New test.
* testsuite/libgomp.c/depend-3.c: New test.
* testsuite/libgomp.c/depend-4.c: New test.
* testsuite/libgomp.c/for-1.c: New test.
* testsuite/libgomp.c/for-1.h: New file.
* testsuite/libgomp.c/for-2.c: New test.
* testsuite/libgomp.c/for-2.h: New file.
* testsuite/libgomp.c/for-3.c: New test.
* testsuite/libgomp.c/pr58392.c: New test.
* testsuite/libgomp.c/simd-1.c: New test.
* testsuite/libgomp.c/simd-2.c: New test.
* testsuite/libgomp.c/simd-3.c: New test.
* testsuite/libgomp.c/simd-4.c: New test.
* testsuite/libgomp.c/simd-5.c: New test.
* testsuite/libgomp.c/simd-6.c: New test.
* testsuite/libgomp.c/target-1.c: New test.
* testsuite/libgomp.c/target-2.c: New test.
* testsuite/libgomp.c/target-3.c: New test.
* testsuite/libgomp.c/target-4.c: New test.
* testsuite/libgomp.c/target-5.c: New test.
* testsuite/libgomp.c/target-6.c: New test.
* testsuite/libgomp.c/target-7.c: New test.
* testsuite/libgomp.c/taskgroup-1.c: New test.
* testsuite/libgomp.c/thread-limit-1.c: New test.
* testsuite/libgomp.c/thread-limit-2.c: New test.
* testsuite/libgomp.c/thread-limit-3.c: New test.
* testsuite/libgomp.c/udr-1.c: New test.
* testsuite/libgomp.c/udr-2.c: New test.
* testsuite/libgomp.c/udr-3.c: New test.
* testsuite/libgomp.c++/affinity-1.C: New test.
* testsuite/libgomp.c++/atomic-10.C: New test.
* testsuite/libgomp.c++/atomic-11.C: New test.
* testsuite/libgomp.c++/atomic-12.C: New test.
* testsuite/libgomp.c++/atomic-13.C: New test.
* testsuite/libgomp.c++/atomic-14.C: New test.
* testsuite/libgomp.c++/atomic-15.C: New test.
* testsuite/libgomp.c++/cancel-for-1.C: New test.
* testsuite/libgomp.c++/cancel-for-2.C: New test.
* testsuite/libgomp.c++/cancel-parallel-1.C: New test.
* testsuite/libgomp.c++/cancel-parallel-2.C: New test.
* testsuite/libgomp.c++/cancel-parallel-3.C: New test.
* testsuite/libgomp.c++/cancel-sections-1.C: New test.
* testsuite/libgomp.c++/cancel-taskgroup-1.C: New test.
* testsuite/libgomp.c++/cancel-taskgroup-2.C: New test.
* testsuite/libgomp.c++/cancel-taskgroup-3.C: New test.
* testsuite/libgomp.c++/cancel-test.h: New file.
* testsuite/libgomp.c++/for-9.C: New test.
* testsuite/libgomp.c++/for-10.C: New test.
* testsuite/libgomp.c++/for-11.C: New test.
* testsuite/libgomp.c++/simd-1.C: New test.
* testsuite/libgomp.c++/simd-2.C: New test.
* testsuite/libgomp.c++/simd-3.C: New test.
* testsuite/libgomp.c++/simd-4.C: New test.
* testsuite/libgomp.c++/simd-5.C: New test.
* testsuite/libgomp.c++/simd-6.C: New test.
* testsuite/libgomp.c++/simd-7.C: New test.
* testsuite/libgomp.c++/simd-8.C: New test.
* testsuite/libgomp.c++/target-1.C: New test.
* testsuite/libgomp.c++/target-2.C: New test.
* testsuite/libgomp.c++/target-2-aux.cc: New file.
* testsuite/libgomp.c++/target-3.C: New test.
* testsuite/libgomp.c++/taskgroup-1.C: New test.
* testsuite/libgomp.c++/udr-1.C: New test.
* testsuite/libgomp.c++/udr-2.C: New test.
* testsuite/libgomp.c++/udr-3.C: New test.
* testsuite/libgomp.c++/udr-4.C: New test.
* testsuite/libgomp.c++/udr-5.C: New test.
* testsuite/libgomp.c++/udr-6.C: New test.
* testsuite/libgomp.c++/udr-7.C: New test.
* testsuite/libgomp.c++/udr-8.C: New test.
* testsuite/libgomp.c++/udr-9.C: New test.
gcc/
* tree-pretty-print.c (dump_omp_clause): Handle OMP_CLAUSE__LOOPTEMP_
and new OpenMP 4.0 clauses, handle UDR OMP_CLAUSE_REDUCTION,
formatting fixes, use pp_colon instead of pp_character (..., ':'),
similarly pp_right_paren.
(dump_generic_node): Handle OMP_DISTRIBUTE, OMP_TEAMS,
OMP_TARGET_DATA, OMP_TARGET, OMP_TARGET_UPDATE, OMP_TASKGROUP,
allow OMP_FOR_INIT to be NULL, handle OMP_ATOMIC_SEQ_CST.
* tree.c (omp_clause_num_ops, omp_clause_code_name): Add OpenMP 4.0
clauses.
(omp_declare_simd_clauses_equal,
omp_remove_redundant_declare_simd_attrs): New functions.
(attribute_value_equal): Use omp_declare_simd_clauses_equal.
(walk_tree_1): Handle new OpenMP 4.0 clauses.
* tree.h (OMP_LOOP_CHECK): Define.
(OMP_FOR_BODY, OMP_FOR_CLAUSES, OMP_FOR_INIT, OMP_FOR_COND,
OMP_FOR_INCR, OMP_FOR_PRE_BODY): Use it.
(OMP_TASKGROUP_BODY, OMP_TEAMS_BODY, OMP_TEAMS_CLAUSES,
OMP_TARGET_DATA_BODY, OMP_TARGET_DATA_CLAUSES, OMP_TARGET_BODY,
OMP_TARGET_CLAUSES, OMP_TARGET_UPDATE_CLAUSES, OMP_CLAUSE_SIZE,
OMP_ATOMIC_SEQ_CST, OMP_CLAUSE_DEPEND_KIND, OMP_CLAUSE_MAP_KIND,
OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION, OMP_CLAUSE_PROC_BIND_KIND,
OMP_CLAUSE_REDUCTION_OMP_ORIG_REF, OMP_CLAUSE_ALIGNED_ALIGNMENT,
OMP_CLAUSE_NUM_TEAMS_EXPR, OMP_CLAUSE_THREAD_LIMIT_EXPR,
OMP_CLAUSE_DEVICE_ID, OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR,
OMP_CLAUSE_SIMDLEN_EXPR): Define.
(OMP_CLAUSE_DECL): Change range up to OMP_CLAUSE__LOOPTEMP_.
(omp_remove_redundant_declare_simd_attrs): New prototype.
* gimple.def (GIMPLE_OMP_TASKGROUP, GIMPLE_OMP_TARGET,
GIMPLE_OMP_TEAMS): New codes.
(GIMPLE_OMP_RETURN): Use GSS_OMP_ATOMIC_STORE instead of GSS_BASE.
* omp-low.c (struct omp_context): Add cancel_label and cancellable
fields.
(target_nesting_level): New variable.
(extract_omp_for_data): Handle GF_OMP_FOR_KIND_DISTRIBUTE and
OMP_CLAUSE_DIST_SCHEDULE. Don't fallback to library implementation
for collapse > 1 static schedule unless ordered.
(get_ws_args_for): Add par_stmt argument. Handle combined loops.
(determine_parallel_type): Adjust get_ws_args_for caller.
(install_var_field): Handle mask & 4 for double indirection.
(scan_sharing_clauses): Ignore shared clause on teams construct.
Handle OMP_CLAUSE__LOOPTEMP_ and new OpenMP 4.0 clauses.
(create_omp_child_function): If inside target or declare target
constructs, set "omp declare target" attribute on the child
function.
(find_combined_for): New function.
(scan_omp_parallel): Handle combined loops.
(scan_omp_target, scan_omp_teams): New functions.
(check_omp_nesting_restrictions): Check new OpenMP 4.0 nesting
restrictions and set ctx->cancellable for cancellable constructs.
(scan_omp_1_stmt): Call check_omp_nesting_restrictions also on
selected builtin calls. Handle GIMPLE_OMP_TASKGROUP,
GIMPLE_OMP_TARGET, GIMPLE_OMP_TEAMS.
(build_omp_barrier): Add lhs argument, return gimple rather than
tree.
(omp_clause_aligned_alignment): New function.
(lower_rec_simd_input_clauses): Only call SET_DECL_VALUE_EXPR
on decls.
(lower_rec_input_clauses): Add FD argument. Ignore shared clauses
on teams constructs. Handle user defined reductions and new
OpenMP 4.0 clauses.
(lower_reduction_clauses): Don't set placeholder to address of ref
if it has already the right type.
(lower_send_clauses): Handle OMP_CLAUSE__LOOPTEMP_.
(expand_parallel_call): Use the new non-_start suffixed builtins,
handle OMP_CLAUSE_PROC_BIND, don't call the outlined function
and GOMP_parallel_end after the call.
(expand_task_call): Handle OMP_CLAUSE_DEPEND.
(expand_omp_for_init_counts): Handle combined loops.
(expand_omp_for_init_vars): Add inner_stmt argument, handle combined
loops.
(expand_omp_for_generic): Likewise. Use GOMP_loop_end_cancel at the
end of cancellable loops.
(expand_omp_for_static_nochunk, expand_omp_for_static_chunk):
Likewise. Handle collapse > 1 loops.
(expand_omp_simd): Handle combined loops.
(expand_omp_for): Add inner_stmt argument, adjust callers of
expand_omp_for* functions, use expand_omp_for_static*chunk even
for collapse > 1 unless ordered.
(expand_omp_sections): Use GOMP_sections_end_cancel at the end
of cancellable sections.
(expand_omp_single): Remove need_barrier variable, just rely on
gimple_omp_return_nowait_p. Adjust build_omp_barrier caller.
(expand_omp_synch): Allow GIMPLE_OMP_TASKGROUP and GIMPLE_OMP_TEAMS.
(expand_omp_atomic_load, expand_omp_atomic_store,
expand_omp_atomic_fetch_op): Handle gimple_omp_atomic_seq_cst_p.
(expand_omp_target): New function.
(expand_omp): Handle combined loops. Handle GIMPLE_OMP_TASKGROUP,
GIMPLE_OMP_TEAMS, GIMPLE_OMP_TARGET.
(build_omp_regions_1): Immediately close region for
GF_OMP_TARGET_KIND_UPDATE.
(maybe_add_implicit_barrier_cancel): New function.
(lower_omp_sections): Adjust lower_rec_input_clauses caller. Handle
cancellation.
(lower_omp_single): Likewise. Add clobber after the barrier.
(lower_omp_taskgroup): New function.
(lower_omp_for): Handle combined loops. Adjust
lower_rec_input_clauses caller. Handle cancellation.
(lower_depend_clauses): New function.
(lower_omp_taskreg): Lower depend clauses. Adjust
lower_rec_input_clauses caller. Add clobber after the call. Handle
cancellation.
(lower_omp_target, lower_omp_teams): New functions.
(lower_omp_1): Handle cancellation. Handle GIMPLE_OMP_TASKGROUP,
GIMPLE_OMP_TARGET, GIMPLE_OMP_TEAMS and GOMP_barrier, GOMP_cancel
and GOMP_cancellation_point calls.
(lower_omp): Fold stmts inside of target region.
(diagnose_sb_1, diagnose_sb_2): Handle GIMPLE_OMP_TASKGROUP,
GIMPLE_OMP_TARGET and GIMPLE_OMP_TEAMS.
* builtin-types.def (DEF_FUNCTION_TYPE_8): Document.
(BT_FN_VOID_OMPFN_PTR_UINT,
BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG,
BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG,
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT): Remove.
(BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT,
BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_UINT,
BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT,
BT_FN_BOOL_INT, BT_FN_BOOL_INT_BOOL, BT_FN_VOID_UINT_UINT,
BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR,
BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR,
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR): New.
* tree-ssa-alias.c (ref_maybe_used_by_call_p_1,
call_may_clobber_ref_p_1): Handle BUILT_IN_GOMP_BARRIER_CANCEL,
BUILT_IN_GOMP_TASKGROUP_END, BUILT_IN_GOMP_LOOP_END_CANCEL,
BUILT_IN_GOMP_SECTIONS_END_CANCEL. Don't handle
BUILT_IN_GOMP_PARALLEL_END.
* gimple-low.c (lower_stmt): Handle GIMPLE_OMP_TASKGROUP,
GIMPLE_OMP_TARGET and GIMPLE_OMP_TEAMS.
* gimple-pretty-print.c (dump_gimple_omp_for): Handle
GF_OMP_FOR_KIND_DISTRIBUTE.
(dump_gimple_omp_target, dump_gimple_omp_teams): New functions.
(dump_gimple_omp_block): Handle GIMPLE_OMP_TASKGROUP.
(dump_gimple_omp_return): Print lhs if it has any.
(dump_gimple_omp_atomic_load, dump_gimple_omp_atomic_store): Handle
gimple_omp_atomic_seq_cst_p.
(pp_gimple_stmt_1): Handle GIMPLE_OMP_TASKGROUP, GIMPLE_OMP_TARGET
and GIMPLE_OMP_TEAMS.
* langhooks.c (lhd_omp_mappable_type): New function.
* tree-vectorizer.c (struct simd_array_to_simduid): Fix up comment.
* langhooks.h (struct lang_hooks_for_types): Add omp_mappable_type
hook.
* gimplify.c (enum gimplify_omp_var_data): Add GOVD_MAP,
GOVD_ALIGNED and GOVD_MAP_TO_ONLY.
(enum omp_region_type): Add ORT_TEAMS, ORT_TARGET_DATA and
ORT_TARGET.
(struct gimplify_omp_ctx): Add combined_loop field.
(gimplify_call_expr, gimplify_modify_expr): Don't call fold_stmt
on stmts inside of target region.
(is_gimple_stmt): Return true for OMP_DISTRIBUTE and OMP_TASKGROUP.
(omp_firstprivatize_variable): Handle GOVD_MAP, GOVD_ALIGNED,
ORT_TARGET and ORT_TARGET_DATA.
(omp_add_variable): Avoid checks on readding var for GOVD_ALIGNED.
Handle GOVD_MAP.
(omp_notice_threadprivate_variable): Complain about threadprivate
variables in target region.
(omp_notice_variable): Complain about vars with non-mappable type
in target region. Handle ORT_TEAMS, ORT_TARGET and ORT_TARGET_DATA.
(omp_check_private): Ignore ORT_TARGET* regions.
(gimplify_scan_omp_clauses, gimplify_adjust_omp_clauses_1,
gimplify_adjust_omp_clauses): Handle new OpenMP 4.0 clauses.
(find_combined_omp_for): New function.
(gimplify_omp_for): Handle gimplification of combined loops.
(gimplify_omp_workshare): Gimplify also OMP_TARGET, OMP_TARGET_DATA,
OMP_TEAMS.
(gimplify_omp_target_update): New function.
(gimplify_omp_atomic): Handle OMP_ATOMIC_SEQ_CST.
(gimplify_expr): Handle OMP_DISTRIBUTE, OMP_TARGET, OMP_TARGET_DATA,
OMP_TARGET_UPDATE, OMP_TEAMS, OMP_TASKGROUP.
(gimplify_body): If fndecl has "omp declare target" attribute, add
implicit ORT_TARGET context around it.
* tree.def (OMP_DISTRIBUTE, OMP_TEAMS, OMP_TARGET_DATA, OMP_TARGET,
OMP_TASKGROUP, OMP_TARGET_UPDATE): New tree codes.
* tree-nested.c (convert_nonlocal_reference_stmt,
convert_local_reference_stmt, convert_gimple_call): Handle
GIMPLE_OMP_TARGET, GIMPLE_OMP_TEAMS and GIMPLE_OMP_TASKGROUP.
* omp-builtins.def (BUILT_IN_GOMP_TASK): Use
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR
instead of BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT.
(BUILT_IN_GOMP_TARGET, BUILT_IN_GOMP_TARGET_DATA,
BUILT_IN_GOMP_TARGET_END_DATA, BUILT_IN_GOMP_TARGET_UPDATE,
BUILT_IN_GOMP_TEAMS, BUILT_IN_BARRIER_CANCEL,
BUILT_IN_GOMP_LOOP_END_CANCEL,
BUILT_IN_GOMP_SECTIONS_END_CANCEL, BUILT_IN_OMP_GET_TEAM_NUM,
BUILT_IN_OMP_GET_NUM_TEAMS, BUILT_IN_GOMP_TASKGROUP_START,
BUILT_IN_GOMP_TASKGROUP_END, BUILT_IN_GOMP_PARALLEL_LOOP_STATIC,
BUILT_IN_GOMP_PARALLEL_LOOP_DYNAMIC,
BUILT_IN_GOMP_PARALLEL_LOOP_GUIDED,
BUILT_IN_GOMP_PARALLEL_LOOP_RUNTIME, BUILT_IN_GOMP_PARALLEL,
BUILT_IN_GOMP_PARALLEL_SECTIONS, BUILT_IN_GOMP_CANCEL,
BUILT_IN_GOMP_CANCELLATION_POINT): New built-ins.
(BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START,
BUILT_IN_GOMP_PARALLEL_LOOP_DYNAMIC_START,
BUILT_IN_GOMP_PARALLEL_LOOP_GUIDED_START,
BUILT_IN_GOMP_PARALLEL_LOOP_RUNTIME_START,
BUILT_IN_GOMP_PARALLEL_START, BUILT_IN_GOMP_PARALLEL_END,
BUILT_IN_GOMP_PARALLEL_SECTIONS_START): Remove.
* tree-inline.c (remap_gimple_stmt, estimate_num_insns):
Handle GIMPLE_OMP_TARGET, GIMPLE_OMP_TEAMS and GIMPLE_OMP_TASKGROUP.
* gimple.c (gimple_build_omp_taskgroup, gimple_build_omp_target,
gimple_build_omp_teams): New functions.
(walk_gimple_op): Handle GIMPLE_OMP_TARGET, GIMPLE_OMP_TEAMS and
GIMPLE_OMP_TASKGROUP. Walk optional lhs on GIMPLE_OMP_RETURN.
(walk_gimple_stmt, gimple_copy): Handle GIMPLE_OMP_TARGET,
GIMPLE_OMP_TEAMS and GIMPLE_OMP_TASKGROUP.
* gimple.h (enum gf_mask): GF_OMP_FOR_KIND_DISTRIBUTE,
GF_OMP_FOR_COMBINED, GF_OMP_FOR_COMBINED_INTO,
GF_OMP_TARGET_KIND_MASK, GF_OMP_TARGET_KIND_REGION,
GF_OMP_TARGET_KIND_DATA, GF_OMP_TARGET_KIND_UPDATE,
GF_OMP_ATOMIC_SEQ_CST): New.
(gimple_build_omp_taskgroup, gimple_build_omp_target,
gimple_build_omp_teams): New prototypes.
(gimple_has_substatements): Handle GIMPLE_OMP_TARGET,
GIMPLE_OMP_TEAMS and GIMPLE_OMP_TASKGROUP.
(gimple_omp_subcode): Use GIMPLE_OMP_TEAMS instead of
GIMPLE_OMP_SINGLE as end of range.
(gimple_omp_return_set_lhs, gimple_omp_return_lhs,
gimple_omp_return_lhs_ptr, gimple_omp_atomic_seq_cst_p,
gimple_omp_atomic_set_seq_cst, gimple_omp_for_combined_p,
gimple_omp_for_set_combined_p, gimple_omp_for_combined_into_p,
gimple_omp_for_set_combined_into_p, gimple_omp_target_clauses,
gimple_omp_target_clauses_ptr, gimple_omp_target_set_clauses,
gimple_omp_target_kind, gimple_omp_target_set_kind,
gimple_omp_target_child_fn, gimple_omp_target_child_fn_ptr,
gimple_omp_target_set_child_fn, gimple_omp_target_data_arg,
gimple_omp_target_data_arg_ptr, gimple_omp_target_set_data_arg,
gimple_omp_teams_clauses, gimple_omp_teams_clauses_ptr,
gimple_omp_teams_set_clauses): New inlines.
(CASE_GIMPLE_OMP): Add GIMPLE_OMP_TARGET, GIMPLE_OMP_TEAMS
and GIMPLE_OMP_TASKGROUP.
* tree-core.h (enum omp_clause_code): Add new OpenMP 4.0 clause
codes.
(enum omp_clause_depend_kind, enum omp_clause_map_kind,
enum omp_clause_proc_bind_kind): New.
(union omp_clause_subcode): Add depend_kind, map_kind and
proc_bind_kind fields.
* tree-cfg.c (make_edges): Handle GIMPLE_OMP_TARGET,
GIMPLE_OMP_TEAMS and GIMPLE_OMP_TASKGROUP.
* langhooks-def.h (lhd_omp_mappable_type): New prototype.
(LANG_HOOKS_OMP_MAPPABLE_TYPE): Define.
(LANG_HOOKS_FOR_TYPES_INITIALIZER): Add it.
gcc/c-family/
* c-cppbuiltin.c (c_cpp_builtins): Predefine _OPENMP to
201307 instead of 201107.
* c-common.c (DEF_FUNCTION_TYPE_8): Define.
(c_common_attribute_table): Add "omp declare target" and
"omp declare simd" attributes.
(handle_omp_declare_target_attribute,
handle_omp_declare_simd_attribute): New functions.
* c-omp.c: Include c-pragma.h.
(c_finish_omp_taskgroup): New function.
(c_finish_omp_atomic): Add swapped argument, if true,
build the operation first with rhs, lhs arguments and use NOP_EXPR
build_modify_expr.
(c_finish_omp_for): Add code argument, pass it down to make_code.
(c_omp_split_clauses): New function.
(c_split_parallel_clauses): Removed.
(c_omp_declare_simd_clause_cmp, c_omp_declare_simd_clauses_to_numbers,
c_omp_declare_simd_clauses_to_decls): New functions.
* c-common.h (omp_clause_mask): New type.
(OMP_CLAUSE_MASK_1): Define.
(omp_clause_mask::omp_clause_mask, omp_clause_mask::operator &=,
omp_clause_mask::operator |=, omp_clause_mask::operator ~,
omp_clause_mask::operator |, omp_clause_mask::operator &,
omp_clause_mask::operator <<, omp_clause_mask::operator >>,
omp_clause_mask::operator ==): New methods.
(enum c_omp_clause_split): New.
(c_finish_omp_taskgroup): New prototype.
(c_finish_omp_atomic): Add swapped argument.
(c_finish_omp_for): Add code argument.
(c_omp_split_clauses): New prototype.
(c_split_parallel_clauses): Removed.
(c_omp_declare_simd_clauses_to_numbers,
c_omp_declare_simd_clauses_to_decls): New prototypes.
* c-pragma.c (omp_pragmas): Add new OpenMP 4.0 constructs.
* c-pragma.h (enum pragma_kind): Add PRAGMA_OMP_CANCEL,
PRAGMA_OMP_CANCELLATION_POINT, PRAGMA_OMP_DECLARE_REDUCTION,
PRAGMA_OMP_DISTRIBUTE, PRAGMA_OMP_END_DECLARE_TARGET, PRAGMA_OMP_SIMD,
PRAGMA_OMP_TARGET, PRAGMA_OMP_TASKGROUP and PRAGMA_OMP_TEAMS.
Remove PRAGMA_OMP_PARALLEL_FOR and PRAGMA_OMP_PARALLEL_SECTIONS.
(enum pragma_omp_clause): Add PRAGMA_OMP_CLAUSE_ALIGNED,
PRAGMA_OMP_CLAUSE_DEPEND, PRAGMA_OMP_CLAUSE_DEVICE,
PRAGMA_OMP_CLAUSE_DIST_SCHEDULE, PRAGMA_OMP_CLAUSE_FOR,
PRAGMA_OMP_CLAUSE_FROM, PRAGMA_OMP_CLAUSE_INBRANCH,
PRAGMA_OMP_CLAUSE_LINEAR, PRAGMA_OMP_CLAUSE_MAP,
PRAGMA_OMP_CLAUSE_NOTINBRANCH, PRAGMA_OMP_CLAUSE_NUM_TEAMS,
PRAGMA_OMP_CLAUSE_PARALLEL, PRAGMA_OMP_CLAUSE_PROC_BIND,
PRAGMA_OMP_CLAUSE_SAFELEN, PRAGMA_OMP_CLAUSE_SECTIONS,
PRAGMA_OMP_CLAUSE_SIMDLEN, PRAGMA_OMP_CLAUSE_TASKGROUP,
PRAGMA_OMP_CLAUSE_THREAD_LIMIT, PRAGMA_OMP_CLAUSE_TO and
PRAGMA_OMP_CLAUSE_UNIFORM.
gcc/ada/
* gcc-interface/utils.c (DEF_FUNCTION_TYPE_8): Define.
gcc/fortran/
* trans-openmp.c (gfc_omp_clause_default_ctor,
gfc_omp_clause_dtor): Return NULL for OMP_CLAUSE_REDUCTION.
* f95-lang.c (ATTR_NULL, DEF_FUNCTION_TYPE_8): Define.
* types.def (DEF_FUNCTION_TYPE_8): Document.
(BT_FN_VOID_OMPFN_PTR_UINT,
BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG,
BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG,
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT): Remove.
(BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT,
BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_UINT,
BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG_UINT,
BT_FN_BOOL_INT, BT_FN_BOOL_INT_BOOL, BT_FN_VOID_UINT_UINT,
BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR,
BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR,
BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR): New.
gcc/lto/
* lto-lang.c (DEF_FUNCTION_TYPE_8): Define.
gcc/c/
* c-lang.h (current_omp_declare_target_attribute): New extern
decl.
* c-parser.c: Include c-lang.h.
(struct c_parser): Change tokens to c_token *.
Add tokens_buf field. Change tokens_avail type to unsigned int.
(c_parser_consume_token): If parser->tokens isn't
&parser->tokens_buf[0], increment parser->tokens.
(c_parser_consume_pragma): Likewise.
(enum pragma_context): Add pragma_struct and pragma_param.
(c_parser_external_declaration): Adjust
c_parser_declaration_or_fndef caller.
(c_parser_declaration_or_fndef): Add omp_declare_simd_clauses
argument, if it is non-vNULL vector, call c_finish_omp_declare_simd.
Adjust recursive call.
(c_parser_struct_or_union_specifier): Use pragma_struct instead
of pragma_external.
(c_parser_parameter_declaration): Use pragma_param instead of
pragma_external.
(c_parser_compound_statement_nostart, c_parser_label,
c_parser_for_statement): Adjust
c_parser_declaration_or_fndef callers.
(c_parser_expr_no_commas): Add omp_atomic_lhs argument, pass
it through to c_parser_conditional_expression.
(c_parser_conditional_expression): Add omp_atomic_lhs argument,
pass it through to c_parser_binary_expression. Adjust recursive
call.
(c_parser_binary_expression): Remove prec argument, add
omp_atomic_lhs argument instead. Always start from PREC_NONE, if
omp_atomic_lhs is non-NULL and one of the arguments of toplevel
binop matches it, use build2 instead of parser_build_binary_op.
(c_parser_pragma): Handle PRAGMA_OMP_CANCEL,
PRAGMA_OMP_CANCELLATION_POINT, PRAGMA_OMP_TARGET,
PRAGMA_OMP_END_DECLARE_TARGET, PRAGMA_OMP_DECLARE_REDUCTION.
Handle pragma_struct and pragma_param the same as pragma_external.
(c_parser_omp_clause_name): Parse new OpenMP 4.0 clause names.
(c_parser_omp_variable_list): Parse array sections for
OMP_CLAUSE_{DEPEND,MAP,TO,FROM} clauses.
(c_parser_omp_clause_collapse): Fully fold collapse expression.
(c_parser_omp_clause_reduction): Handle user defined reductions.
(c_parser_omp_clause_branch, c_parser_omp_clause_cancelkind,
c_parser_omp_clause_num_teams, c_parser_omp_clause_thread_limit,
c_parser_omp_clause_aligned, c_parser_omp_clause_linear,
c_parser_omp_clause_safelen, c_parser_omp_clause_simdlen,
c_parser_omp_clause_depend, c_parser_omp_clause_map,
c_parser_omp_clause_device, c_parser_omp_clause_dist_schedule,
c_parser_omp_clause_proc_bind, c_parser_omp_clause_to,
c_parser_omp_clause_from, c_parser_omp_clause_uniform): New functions.
(c_parser_omp_all_clauses): Add finish_p argument. Don't call
c_finish_omp_clauses if it is false. Handle new OpenMP 4.0 clauses.
(c_parser_omp_atomic): Parse seq_cst clause, pass true if it is
present to c_finish_omp_atomic. Handle OpenMP 4.0 atomic forms.
(c_parser_omp_for_loop): Add CODE argument, pass it through
to c_finish_omp_for. Change last argument to cclauses,
and adjust uses to grab parallel clauses from the array of all
the split clauses. Adjust c_parser_binary_expression,
c_parser_declaration_or_fndef and c_finish_omp_for callers.
(omp_split_clauses): New function.
(c_parser_omp_simd): New function.
(c_parser_omp_for): Add p_name, mask and cclauses arguments.
Allow the function to be called also when parsing combined constructs,
and call c_parser_omp_simd when parsing for simd.
(c_parser_omp_sections_scope): If section-sequence doesn't start with
#pragma omp section, require exactly one structured-block instead of
sequence of statements.
(c_parser_omp_sections): Add p_name, mask and cclauses arguments.
Allow the function to be called also when parsing combined constructs.
(c_parser_omp_parallel): Add p_name, mask and cclauses arguments.
Allow the function to be called also when parsing combined
constructs.
(c_parser_omp_taskgroup, c_parser_omp_cancel,
c_parser_omp_cancellation_point, c_parser_omp_distribute,
c_parser_omp_teams, c_parser_omp_target_data,
c_parser_omp_target_update, c_parser_omp_target,
c_parser_omp_declare_simd, c_finish_omp_declare_simd,
c_parser_omp_declare_target, c_parser_omp_end_declare_target,
c_parser_omp_declare_reduction, c_parser_omp_declare): New functions.
(c_parser_omp_construct): Add p_name and mask vars. Handle
PRAGMA_OMP_DISTRIBUTE, PRAGMA_OMP_SIMD, PRAGMA_OMP_TASKGROUP,
PRAGMA_OMP_TEAMS. Adjust c_parser_omp_for, c_parser_omp_parallel
and c_parser_omp_sections callers.
(c_parse_file): Initialize tparser.tokens and the_parser->tokens here.
(OMP_FOR_CLAUSE_MASK, OMP_SECTIONS_CLAUSE_MASK,
OMP_SINGLE_CLAUSE_MASK): Use OMP_CLAUSE_MASK_1 instead of 1.
(OMP_PARALLEL_CLAUSE_MASK): Likewise. Add OMP_CLAUSE_PROC_BIND.
(OMP_TASK_CLAUSE_MASK): Use OMP_CLAUSE_MASK_1 instead of 1. Add
OMP_CLAUSE_DEPEND.
(OMP_SIMD_CLAUSE_MASK, OMP_CANCEL_CLAUSE_MASK,
OMP_CANCELLATION_POINT_CLAUSE_MASK, OMP_DISTRIBUTE_CLAUSE_MASK,
OMP_TEAMS_CLAUSE_MASK, OMP_TARGET_DATA_CLAUSE_MASK,
OMP_TARGET_UPDATE_CLAUSE_MASK, OMP_TARGET_CLAUSE_MASK,
OMP_DECLARE_SIMD_CLAUSE_MASK): Define.
* c-typeck.c: Include tree-inline.h.
(c_finish_omp_cancel, c_finish_omp_cancellation_point,
handle_omp_array_sections_1, handle_omp_array_sections,
c_clone_omp_udr, c_find_omp_placeholder_r): New functions.
(c_finish_omp_clauses): Handle new OpenMP 4.0 clauses and
user defined reductions.
(c_tree_equal): New function.
* c-tree.h (temp_store_parm_decls, temp_pop_parm_decls,
c_finish_omp_cancel, c_finish_omp_cancellation_point, c_tree_equal,
c_omp_reduction_id, c_omp_reduction_decl, c_omp_reduction_lookup,
c_check_omp_declare_reduction_r): New prototypes.
* c-decl.c (current_omp_declare_target_attribute): New variable.
(c_decl_attributes): New function.
(start_decl, start_function): Use it instead of decl_attributes.
(temp_store_parm_decls, temp_pop_parm_decls, c_omp_reduction_id,
c_omp_reduction_decl, c_omp_reduction_lookup,
c_check_omp_declare_reduction_r): New functions.
gcc/cp/
* decl.c (duplicate_decls): Error out for redeclaration of UDRs.
(declare_simd_adjust_this): New function.
(grokfndecl): If "omp declare simd" attribute is present,
call declare_simd_adjust_this if needed and
c_omp_declare_simd_clauses_to_numbers.
* cp-array-notation.c (expand_array_notation_exprs): Handle
OMP_TASKGROUP.
* cp-gimplify.c (cp_gimplify_expr): Handle OMP_SIMD and
OMP_DISTRIBUTE. Handle is_invisiref_parm decls in
OMP_CLAUSE_REDUCTION.
(cp_genericize_r): Handle OMP_SIMD and OMP_DISTRIBUTE like
OMP_FOR.
(cxx_omp_privatize_by_reference): Return true for
is_invisiref_parm decls.
(cxx_omp_finish_clause): Adjust cxx_omp_create_clause_info
caller.
* pt.c (apply_late_template_attributes): For "omp declare simd"
attribute call tsubst_omp_clauses,
c_omp_declare_simd_clauses_to_decls, finish_omp_clauses
and c_omp_declare_simd_clauses_to_numbers.
(instantiate_class_template_1): Call cp_check_omp_declare_reduction
for UDRs.
(tsubst_decl): Handle UDRs.
(tsubst_omp_clauses): Add declare_simd argument, if true don't
call finish_omp_clauses. Handle new OpenMP 4.0 clauses.
Handle non-NULL OMP_CLAUSE_REDUCTION_PLACEHOLDER on
OMP_CLAUSE_REDUCTION.
(tsubst_expr): For UDRs call pushdecl and
cp_check_omp_declare_reduction. Adjust tsubst_omp_clauses
callers. Handle OMP_SIMD, OMP_DISTRIBUTE, OMP_TEAMS,
OMP_TARGET_DATA, OMP_TARGET_UPDATE, OMP_TARGET, OMP_TASKGROUP.
Adjust finish_omp_atomic caller.
(tsubst_omp_udr): New function.
(instantiate_decl): For UDRs at block scope, don't call
start_preparsed_function/finish_function. Call tsubst_omp_udr.
* semantics.c (cxx_omp_create_clause_info): Add need_dtor argument,
use it instead of need_default_ctor || need_copy_ctor.
(struct cp_check_omp_declare_reduction_data): New type.
(handle_omp_array_sections_1, handle_omp_array_sections,
omp_reduction_id, omp_reduction_lookup,
cp_remove_omp_priv_cleanup_stmt, cp_check_omp_declare_reduction_r,
cp_check_omp_declare_reduction, clone_omp_udr,
find_omp_placeholder_r, finish_omp_reduction_clause): New functions.
(finish_omp_clauses): Handle new OpenMP 4.0 clauses and user defined
reductions.
(finish_omp_for): Add CODE argument, use it instead of hardcoded
OMP_FOR. Adjust c_finish_omp_for caller.
(finish_omp_atomic): Add seq_cst argument, adjust
c_finish_omp_atomic callers, handle seq_cst and new OpenMP 4.0
atomic variants.
(finish_omp_cancel, finish_omp_cancellation_point): New functions.
* decl2.c (mark_used): Force immediate instantiation of
DECL_OMP_DECLARE_REDUCTION_P decls.
(is_late_template_attribute): Return true for "omp declare simd"
attribute.
(cp_omp_mappable_type): New function.
(cplus_decl_attributes): Add implicit "omp declare target" attribute
if requested.
* parser.c (cp_debug_parser): Print
parser->colon_doesnt_start_class_def_p.
(cp_ensure_no_omp_declare_simd, cp_finalize_omp_declare_simd): New
functions.
(enum pragma_context): Add pragma_member and pragma_objc_icode.
(cp_parser_binary_expression): Handle no_toplevel_fold_p
even for binary operations other than comparison.
(cp_parser_linkage_specification): Call
cp_ensure_no_omp_declare_simd if needed.
(cp_parser_namespace_definition): Likewise.
(cp_parser_init_declarator): Call cp_finalize_omp_declare_simd.
(cp_parser_direct_declarator): Pass declarator to
cp_parser_late_return_type_opt.
(cp_parser_late_return_type_opt): Add declarator argument,
call cp_parser_late_parsing_omp_declare_simd for declare simd.
(cp_parser_class_specifier_1): Call cp_ensure_no_omp_declare_simd.
Parse UDRs before all other methods.
(cp_parser_member_specification_opt): Use pragma_member instead of
pragma_external.
(cp_parser_member_declaration): Call cp_finalize_omp_declare_simd.
(cp_parser_function_definition_from_specifiers_and_declarator,
cp_parser_save_member_function_body): Likewise.
(cp_parser_late_parsing_for_member): Handle UDRs specially.
(cp_parser_next_token_starts_class_definition_p): Don't allow
CPP_COLON if colon_doesnt_start_class_def_p flag is true.
(cp_parser_objc_interstitial_code): Use pragma_objc_icode
instead of pragma_external.
(cp_parser_omp_clause_name): Parse new OpenMP 4.0 clause names.
(cp_parser_omp_var_list_no_open): Parse array sections for
OMP_CLAUSE_{DEPEND,MAP,TO,FROM} clauses. Add COLON argument,
if non-NULL, allow parsing to end with a colon rather than close
paren.
(cp_parser_omp_var_list): Adjust cp_parser_omp_var_list_no_open
caller.
(cp_parser_omp_clause_reduction): Handle user defined reductions.
(cp_parser_omp_clause_branch, cp_parser_omp_clause_cancelkind,
cp_parser_omp_clause_num_teams, cp_parser_omp_clause_thread_limit,
cp_parser_omp_clause_aligned, cp_parser_omp_clause_linear,
cp_parser_omp_clause_safelen, cp_parser_omp_clause_simdlen,
cp_parser_omp_clause_depend, cp_parser_omp_clause_map,
cp_parser_omp_clause_device, cp_parser_omp_clause_dist_schedule,
cp_parser_omp_clause_proc_bind, cp_parser_omp_clause_to,
cp_parser_omp_clause_from, cp_parser_omp_clause_uniform): New
functions.
(cp_parser_omp_all_clauses): Add finish_p argument. Don't call
finish_omp_clauses if it is false. Handle new OpenMP 4.0 clauses.
(cp_parser_omp_atomic): Parse seq_cst clause, pass
true if it is present to finish_omp_atomic. Handle new OpenMP 4.0
atomic forms.
(cp_parser_omp_for_loop): Add CODE argument, pass it through
to finish_omp_for. Change last argument to cclauses,
and adjust uses to grab parallel clauses from the array of all
the split clauses.
(cp_omp_split_clauses): New function.
(cp_parser_omp_simd): New function.
(cp_parser_omp_for): Add p_name, mask and cclauses arguments.
Allow the function to be called also when parsing combined constructs,
and call c_parser_omp_simd when parsing for simd.
(cp_parser_omp_sections_scope): If section-sequence doesn't start with
#pragma omp section, require exactly one structured-block instead of
sequence of statements.
(cp_parser_omp_sections): Add p_name, mask and cclauses arguments.
Allow the function to be called also when parsing combined constructs.
(cp_parser_omp_parallel): Add p_name, mask and cclauses arguments.
Allow the function to be called also when parsing combined
constructs.
(cp_parser_omp_taskgroup, cp_parser_omp_cancel,
cp_parser_omp_cancellation_point, cp_parser_omp_distribute,
cp_parser_omp_teams, cp_parser_omp_target_data,
cp_parser_omp_target_update, cp_parser_omp_target,
cp_parser_omp_declare_simd, cp_parser_late_parsing_omp_declare_simd,
cp_parser_omp_declare_target, cp_parser_omp_end_declare_target,
cp_parser_omp_declare_reduction_exprs, cp_parser_omp_declare_reduction,
cp_parser_omp_declare): New functions.
(cp_parser_omp_construct): Add p_name and mask vars. Handle
PRAGMA_OMP_DISTRIBUTE, PRAGMA_OMP_SIMD, PRAGMA_OMP_TASKGROUP,
PRAGMA_OMP_TEAMS. Adjust cp_parser_omp_for, cp_parser_omp_parallel
and cp_parser_omp_sections callers.
(cp_parser_pragma): Handle PRAGMA_OMP_CANCEL,
PRAGMA_OMP_CANCELLATION_POINT, PRAGMA_OMP_DECLARE_REDUCTION,
PRAGMA_OMP_DISTRIBUTE, PRAGMA_OMP_SIMD, PRAGMA_OMP_TASKGROUP,
PRAGMA_OMP_TEAMS, PRAGMA_OMP_TARGET, PRAGMA_OMP_END_DECLARE_TARGET.
Handle pragma_member and pragma_objc_icode like pragma_external.
(OMP_FOR_CLAUSE_MASK, OMP_SECTIONS_CLAUSE_MASK,
OMP_SINGLE_CLAUSE_MASK): Use OMP_CLAUSE_MASK_1 instead of 1.
(OMP_PARALLEL_CLAUSE_MASK): Likewise. Add OMP_CLAUSE_PROC_BIND.
(OMP_TASK_CLAUSE_MASK): Use OMP_CLAUSE_MASK_1 instead of 1. Add
OMP_CLAUSE_DEPEND.
(OMP_SIMD_CLAUSE_MASK, OMP_CANCEL_CLAUSE_MASK,
OMP_CANCELLATION_POINT_CLAUSE_MASK, OMP_DISTRIBUTE_CLAUSE_MASK,
OMP_TEAMS_CLAUSE_MASK, OMP_TARGET_DATA_CLAUSE_MASK,
OMP_TARGET_UPDATE_CLAUSE_MASK, OMP_TARGET_CLAUSE_MASK,
OMP_DECLARE_SIMD_CLAUSE_MASK): Define.
* parser.h (struct cp_omp_declare_simd_data): New type.
(struct cp_parser): Add colon_doesnt_start_class_def_p and
omp_declare_simd fields.
* cp-objcp-common.h (LANG_HOOKS_OMP_MAPPABLE_TYPE): Define.
* cp-tree.h (struct lang_decl_fn): Add omp_declare_reduction_p
bit.
(DECL_OMP_DECLARE_REDUCTION_P): Define.
(OMP_FOR_GIMPLIFYING_P): Use OMP_LOOP_CHECK macro.
(struct saved_scope): Add omp_declare_target_attribute field.
(cp_omp_mappable_type, omp_reduction_id,
cp_remove_omp_priv_cleanup_stmt, cp_check_omp_declare_reduction,
finish_omp_cancel, finish_omp_cancellation_point): New prototypes.
(finish_omp_for): Add CODE argument.
(finish_omp_atomic): Add seq_cst argument.
(cxx_omp_create_clause_info): Add need_dtor argument.
gcc/testsuite/
* c-c++-common/gomp/atomic-15.c: Adjust for C diagnostics.
Remove error test that is now valid in OpenMP 4.0.
* c-c++-common/gomp/atomic-16.c: New test.
* c-c++-common/gomp/cancel-1.c: New test.
* c-c++-common/gomp/depend-1.c: New test.
* c-c++-common/gomp/depend-2.c: New test.
* c-c++-common/gomp/map-1.c: New test.
* c-c++-common/gomp/pr58472.c: New test.
* c-c++-common/gomp/sections1.c: New test.
* c-c++-common/gomp/simd1.c: New test.
* c-c++-common/gomp/simd2.c: New test.
* c-c++-common/gomp/simd3.c: New test.
* c-c++-common/gomp/simd4.c: New test.
* c-c++-common/gomp/simd5.c: New test.
* c-c++-common/gomp/single1.c: New test.
* g++.dg/gomp/block-0.C: Adjust for stricter #pragma omp sections
parser.
* g++.dg/gomp/block-3.C: Likewise.
* g++.dg/gomp/clause-3.C: Adjust error messages.
* g++.dg/gomp/declare-simd-1.C: New test.
* g++.dg/gomp/declare-simd-2.C: New test.
* g++.dg/gomp/depend-1.C: New test.
* g++.dg/gomp/depend-2.C: New test.
* g++.dg/gomp/target-1.C: New test.
* g++.dg/gomp/target-2.C: New test.
* g++.dg/gomp/taskgroup-1.C: New test.
* g++.dg/gomp/teams-1.C: New test.
* g++.dg/gomp/udr-1.C: New test.
* g++.dg/gomp/udr-2.C: New test.
* g++.dg/gomp/udr-3.C: New test.
* g++.dg/gomp/udr-4.C: New test.
* g++.dg/gomp/udr-5.C: New test.
* g++.dg/gomp/udr-6.C: New test.
* gcc.dg/autopar/outer-1.c: Expect 4 instead of 5 loopfn matches.
* gcc.dg/autopar/outer-2.c: Likewise.
* gcc.dg/autopar/outer-3.c: Likewise.
* gcc.dg/autopar/outer-4.c: Likewise.
* gcc.dg/autopar/outer-5.c: Likewise.
* gcc.dg/autopar/outer-6.c: Likewise.
* gcc.dg/autopar/parallelization-1.c: Likewise.
* gcc.dg/gomp/block-3.c: Adjust for stricter #pragma omp sections
parser.
* gcc.dg/gomp/clause-1.c: Adjust error messages.
* gcc.dg/gomp/combined-1.c: Look for GOMP_parallel_loop_runtime
instead of GOMP_parallel_loop_runtime_start.
* gcc.dg/gomp/declare-simd-1.c: New test.
* gcc.dg/gomp/declare-simd-2.c: New test.
* gcc.dg/gomp/nesting-1.c: Adjust for stricter #pragma omp sections
parser. Add further #pragma omp sections nesting tests.
* gcc.dg/gomp/target-1.c: New test.
* gcc.dg/gomp/target-2.c: New test.
* gcc.dg/gomp/taskgroup-1.c: New test.
* gcc.dg/gomp/teams-1.c: New test.
* gcc.dg/gomp/udr-1.c: New test.
* gcc.dg/gomp/udr-2.c: New test.
* gcc.dg/gomp/udr-3.c: New test.
* gcc.dg/gomp/udr-4.c: New test.
* gfortran.dg/gomp/appendix-a/a.35.5.f90: Add dg-error.
Co-Authored-By: Richard Henderson <rth@redhat.com>
Co-Authored-By: Tobias Burnus <burnus@net-b.de>
From-SVN: r203408
Diffstat (limited to 'libgomp')
114 files changed, 10414 insertions, 407 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 7646e5c..557b80a 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,367 @@ +2013-10-11 Jakub Jelinek <jakub@redhat.com> + Tobias Burnus <burnus@net-b.de> + Richard Henderson <rth@redhat.com> + + * target.c: New file. + * Makefile.am (libgomp_la_SOURCES): Add target.c. + * Makefile.in: Regenerated. + * libgomp_g.h (GOMP_task): Add depend argument. + (GOMP_barrier_cancel, GOMP_loop_end_cancel, + GOMP_sections_end_cancel, GOMP_target, GOMP_target_data, + GOMP_target_end_data, GOMP_target_update, GOMP_teams, + GOMP_parallel_loop_static, GOMP_parallel_loop_dynamic, + GOMP_parallel_loop_guided, GOMP_parallel_loop_runtime, + GOMP_parallel, GOMP_cancel, GOMP_cancellation_point, + GOMP_taskgroup_start, GOMP_taskgroup_end, + GOMP_parallel_sections): New prototypes. + * fortran.c (omp_is_initial_device): Add ialias_redirect. + (omp_is_initial_device_): New function. + (ULP, STR1, STR2, ialias_redirect): Removed. + (omp_get_cancellation_, omp_get_proc_bind_, omp_set_default_device_, + omp_set_default_device_8_, omp_get_default_device_, + omp_get_num_devices_, omp_get_num_teams_, omp_get_team_num_): New + functions. + * libgomp.map (GOMP_barrier_cancel, GOMP_loop_end_cancel, + GOMP_sections_end_cancel, GOMP_target, GOMP_target_data, + GOMP_target_end_data, GOMP_target_update, GOMP_teams): Export + @@GOMP_4.0. + (omp_is_initial_device, omp_is_initial_device_, omp_get_cancellation, + omp_get_cancellation_, omp_get_proc_bind, omp_get_proc_bind_, + omp_set_default_device, omp_set_default_device_, + omp_set_default_device_8_, omp_get_default_device, + omp_get_default_device_, omp_get_num_devices, omp_get_num_devices_, + omp_get_num_teams, omp_get_num_teams_, omp_get_team_num, + omp_get_team_num_): Export @@OMP_4.0. + * team.c (struct gomp_thread_start_data): Add place field. + (gomp_thread_start): Clear thr->thread_pool and + thr->task before returning. Use gomp_team_barrier_wait_final + instead of gomp_team_barrier_wait. Initialize thr->place. + (gomp_new_team): Initialize work_shares_to_free, work_share_cancelled, + team_cancelled and task_queued_count fields. + (gomp_free_pool_helper): Clear thr->thread_pool and thr->task + before calling pthread_exit. + (gomp_free_thread): No longer static. Use + gomp_managed_threads_lock instead of gomp_remaining_threads_lock. + (gomp_team_start): Add flags argument. Set + thr->thread_pool->threads_busy to nthreads immediately after creating + new pool. Use gomp_managed_threads_lock instead of + gomp_remaining_threads_lock. Handle OpenMP 4.0 affinity. + (gomp_team_end): Use gomp_managed_threads_lock instead of + gomp_remaining_threads_lock. Use gomp_team_barrier_wait_final instead + of gomp_team_barrier_wait. If team->team_cancelled, call + gomp_fini_worshare on ws chain starting at team->work_shares_to_free + rather than thr->ts.work_share. + (initialize_team): Don't call gomp_sem_init here. + * sections.c (GOMP_parallel_sections_start): Adjust gomp_team_start + caller. + (GOMP_parallel_sections, GOMP_sections_end_cancel): New functions. + * env.c (gomp_global_icv): Add default_device_var, target_data and + bind_var initializers. + (gomp_cpu_affinity, gomp_cpu_affinity_len): Remove. + (gomp_bind_var_list, gomp_bind_var_list_len, gomp_places_list, + gomp_places_list_len): New variables. + (parse_bind_var, parse_one_place, parse_places_var): New functions. + (parse_affinity): Rewritten to construct OMP_PLACES list with unit + sized places. + (gomp_cancel_var): New global variable. + (parse_int): New function. + (handle_omp_display_env): New function. + (initialize_env): Use it. Initialize default_device_var. + Parse OMP_CANCELLATION env var. Use parse_bind_var to parse + OMP_PROC_BIND instead of parse_boolean. Use parse_places_var for + OMP_PLACES parsing. Don't call parse_affinity if OMP_PLACES has + been successfully parsed (and call gomp_init_affinity in that case). + (omp_get_cancellation, omp_get_proc_bind, omp_set_default_device, + omp_get_default_device, omp_get_num_devices, omp_get_num_teams, + omp_get_team_num, omp_is_initial_device): New functions. + * libgomp.h: Include stdlib.h. + (ialias_ulp, ialias_str1, ialias_str2, ialias_redirect, ialias_call): + Define. + (struct target_mem_desc): Forward declare. + (struct gomp_task_icv): Add default_device_var, target_data, bind_var + and thread_limit_var fields. + (gomp_get_num_devices): New prototype. + (gomp_cancel_var): New extern decl. + (struct gomp_team): Add work_shares_to_free, work_share_cancelled, + team_cancelled and task_queued_count fields. Add comments about + task_{,queued_,running_}count. + (gomp_cancel_kind): New enum. + (gomp_work_share_end_cancel): New prototype. + (struct gomp_task): Add next_taskgroup, prev_taskgroup, taskgroup, + copy_ctors_done, dependers, depend_hash, depend_count, num_dependees + and depend fields. + (struct gomp_taskgroup): New type. + (struct gomp_task_depend_entry, + struct gomp_dependers_vec): New types. + (gomp_finish_task): Free depend_hash if non-NULL. + (struct gomp_team_state): Add place_partition_off + and place_partition_len fields. + (gomp_bind_var_list, gomp_bind_var_list_len, gomp_places_list, + gomp_places_list_len): New extern decls. + (struct gomp_thread): Add place field. + (gomp_cpu_affinity, gomp_cpu_affinity_len): Remove. + (gomp_init_thread_affinity): Add place argument. + (gomp_affinity_alloc, gomp_affinity_init_place, gomp_affinity_add_cpus, + gomp_affinity_remove_cpu, gomp_affinity_copy_place, + gomp_affinity_same_place, gomp_affinity_finalize_place_list, + gomp_affinity_init_level, gomp_affinity_print_place): New + prototypes. + (gomp_team_start): Add flags argument. + (gomp_thread_limit_var, gomp_remaining_threads_count, + gomp_remaining_threads_lock): Remove. + (gomp_managed_threads_lock): New variable. + (struct gomp_thread_pool): Add threads_busy field. + (gomp_free_thread): New prototype. + * task.c: Include hashtab.h. + (hash_entry_type): New typedef. + (htab_alloc, htab_free, htab_hash, htab_eq): New inlines. + (gomp_init_task): Clear dependers, depend_hash, depend_count, + copy_ctors_done and taskgroup fields. + (GOMP_task): Add depend argument, handle depend clauses. If + gomp_team_barrier_cancelled or if it's taskgroup has been + cancelled, don't queue or start new tasks. Set copy_ctors_done + field if needed. Initialize taskgroup field. If copy_ctors_done + and already cancelled, don't discard the task. If taskgroup is + non-NULL, enqueue the task into taskgroup queue. Increment + num_children field in taskgroup. Increment task_queued_count. + (gomp_task_run_pre, gomp_task_run_post_remove_parent, + gomp_task_run_post_remove_taskgroup): New inline functions. + (gomp_task_run_post_handle_depend_hash, + gomp_task_run_post_handle_dependers, + gomp_task_run_post_handle_depend): New functions. + (GOMP_taskwait): Use them. If more than one new tasks + have been queued, wake other threads if needed. + (gomp_barrier_handle_tasks): Likewise. If + gomp_team_barrier_cancelled, don't start any new tasks, just free + all tasks. + (GOMP_taskgroup_start, GOMP_taskgroup_end): New functions. + * omp_lib.f90.in + (omp_proc_bind_kind, omp_proc_bind_false, + omp_proc_bind_true, omp_proc_bind_master, omp_proc_bind_close, + omp_proc_bind_spread): New params. + (omp_get_cancellation, omp_get_proc_bind, omp_set_default_device, + omp_get_default_device, omp_get_num_devices, omp_get_num_teams, + omp_get_team_num, omp_is_initial_device): New interfaces. + (omp_get_dynamic, omp_get_nested, omp_in_parallel, + omp_get_max_threads, omp_get_num_procs, omp_get_num_threads, + omp_get_thread_num, omp_get_thread_limit, omp_set_max_active_levels, + omp_get_max_active_levels, omp_get_level, omp_get_ancestor_thread_num, + omp_get_team_size, omp_get_active_level, omp_in_final): Remove + useless use omp_lib_kinds. + * omp.h.in (omp_proc_bind_t): New typedef. + (omp_get_cancellation, omp_get_proc_bind, omp_set_default_device, + omp_get_default_device, omp_get_num_devices, omp_get_num_teams, + omp_get_team_num, omp_is_initial_device): New prototypes. + * loop.c (gomp_parallel_loop_start): Add flags argument, pass it + through to gomp_team_start. + (GOMP_parallel_loop_static_start, GOMP_parallel_loop_dynamic_start, + GOMP_parallel_loop_guided_start, GOMP_parallel_loop_runtime_start): + Adjust gomp_parallel_loop_start callers. + (GOMP_parallel_loop_static, GOMP_parallel_loop_dynamic, + GOMP_parallel_loop_guided, GOMP_parallel_loop_runtime, + GOMP_loop_end_cancel): New functions. + (GOMP_parallel_end): Add ialias_redirect. + * hashtab.h: New file. + * libgomp.texi (Environment Variables): Minor cleanup, + update section refs to OpenMP 4.0rc2. + (OMP_DISPLAY_ENV, GOMP_SPINCOUNT): Document these + environment variables. + * work.c (gomp_work_share_end, gomp_work_share_end_nowait): Set + team->work_shares_to_free to thr->ts.work_share before calling + free_work_share. + (gomp_work_share_end_cancel): New function. + * config/linux/proc.c: Include errno.h. + (gomp_get_cpuset_size, gomp_cpuset_size, gomp_cpusetp): New variables. + (gomp_cpuset_popcount): Add cpusetsize argument, use it instead of + sizeof (cpu_set_t) to determine number of iterations. Fix up check + extern decl. Use CPU_COUNT_S if available, or CPU_COUNT if + gomp_cpuset_size is sizeof (cpu_set_t). + (gomp_init_num_threads): Initialize gomp_cpuset_size, + gomp_get_cpuset_size and gomp_cpusetp here, use gomp_cpusetp instead + of &cpuset and pass gomp_cpuset_size instead of sizeof (cpu_set_t) + to pthread_getaffinity_np. Free and clear gomp_cpusetp if it didn't + contain any logical CPUs. + (get_num_procs): Don't call pthread_getaffinity_np if gomp_cpusetp + is NULL. Use gomp_cpusetp instead of &cpuset and pass + gomp_get_cpuset_size instead of sizeof (cpu_set_t) to + pthread_getaffinity_np. Check gomp_places_list instead of + gomp_cpu_affinity. Adjust gomp_cpuset_popcount caller. + * config/linux/bar.c (gomp_barrier_wait_end, + gomp_barrier_wait_last): Use BAR_* defines. + (gomp_team_barrier_wait_end): Likewise. Clear BAR_CANCELLED + from state where needed. Set work_share_cancelled to 0 on last + thread. + (gomp_team_barrier_wait_final, gomp_team_barrier_wait_cancel_end, + gomp_team_barrier_wait_cancel, gomp_team_barrier_cancel): New + functions. + * config/linux/proc.h (gomp_cpuset_popcount): Add attribute_hidden. + Add cpusetsize argument. + (gomp_cpuset_size, gomp_cpusetp): Declare. + * config/linux/affinity.c: Include errno.h, stdio.h and string.h. + (affinity_counter): Remove. + (CPU_ISSET_S, CPU_ZERO_S, CPU_SET_S, CPU_CLR_S): Define + if CPU_ALLOC_SIZE isn't defined. + (gomp_init_affinity): Rewritten, if gomp_places_list is NULL, try + silently create OMP_PLACES=threads, if it is non-NULL afterwards, + bind current thread to the first place. + (gomp_init_thread_affinity): Rewritten. Add place argument, just + pthread_setaffinity_np to gomp_places_list[place]. + (gomp_affinity_alloc, gomp_affinity_init_place, gomp_affinity_add_cpus, + gomp_affinity_remove_cpu, gomp_affinity_copy_place, + gomp_affinity_same_place, gomp_affinity_finalize_place_list, + gomp_affinity_init_level, gomp_affinity_print_place): New functions. + * config/linux/bar.h (BAR_TASK_PENDING, BAR_WAS_LAST, + BAR_WAITING_FOR_TASK, BAR_INCR, BAR_CANCELLED): Define. + (gomp_barrier_t): Add awaited_final field. + (gomp_barrier_init): Initialize awaited_final field. + (gomp_team_barrier_wait_final, gomp_team_barrier_wait_cancel, + gomp_team_barrier_wait_cancel_end, gomp_team_barrier_cancel): New + prototypes. + (gomp_barrier_wait_start): Preserve BAR_CANCELLED bit. Use BAR_* + defines. + (gomp_barrier_wait_cancel_start, gomp_team_barrier_wait_final_start, + gomp_team_barrier_cancelled): New inline functions. + (gomp_barrier_last_thread, + gomp_team_barrier_set_task_pending, + gomp_team_barrier_clear_task_pending, + gomp_team_barrier_set_waiting_for_tasks, + gomp_team_barrier_waiting_for_tasks, + gomp_team_barrier_done): Use BAR_* defines. + * config/posix/bar.c (gomp_barrier_init): Clear cancellable field. + (gomp_barrier_wait_end): Use BAR_* defines. + (gomp_team_barrier_wait_end): Clear BAR_CANCELLED from state. + Set work_share_cancelled to 0 on last thread, use __atomic_load_n. + Use BAR_* defines. + (gomp_team_barrier_wait_cancel_end, gomp_team_barrier_wait_cancel, + gomp_team_barrier_cancel): New functions. + * config/posix/affinity.c (gomp_init_thread_affinity): Add place + argument. + (gomp_affinity_alloc, gomp_affinity_init_place, gomp_affinity_add_cpus, + gomp_affinity_remove_cpu, gomp_affinity_copy_place, + gomp_affinity_same_place, gomp_affinity_finalize_place_list, + gomp_affinity_init_level, gomp_affinity_print_place): New stubs. + * config/posix/bar.h (BAR_TASK_PENDING, BAR_WAS_LAST, + BAR_WAITING_FOR_TASK, BAR_INCR, BAR_CANCELLED): Define. + (gomp_barrier_t): Add cancellable field. + (gomp_team_barrier_wait_cancel, gomp_team_barrier_wait_cancel_end, + gomp_team_barrier_cancel): New prototypes. + (gomp_barrier_wait_start): Preserve BAR_CANCELLED bit. + (gomp_barrier_wait_cancel_start, gomp_team_barrier_wait_final, + gomp_team_barrier_cancelled): New inline functions. + (gomp_barrier_wait_start, gomp_barrier_last_thread, + gomp_team_barrier_set_task_pending, + gomp_team_barrier_clear_task_pending, + gomp_team_barrier_set_waiting_for_tasks, + gomp_team_barrier_waiting_for_tasks, + gomp_team_barrier_done): Use BAR_* defines. + * barrier.c (GOMP_barrier_cancel): New function. + * omp_lib.h.in (omp_proc_bind_kind, omp_proc_bind_false, + omp_proc_bind_true, omp_proc_bind_master, omp_proc_bind_close, + omp_proc_bind_spread): New params. + (omp_get_cancellation, omp_get_proc_bind, omp_set_default_device, + omp_get_default_device, omp_get_num_devices, omp_get_num_teams, + omp_get_team_num, omp_is_initial_device): New externals. + * parallel.c (GOMP_parallel, GOMP_cancel, GOMP_cancellation_point): + New functions. + (gomp_resolve_num_threads): Adjust for thread_limit now being in + icv->thread_limit_var. Use UINT_MAX instead of ULONG_MAX as + infinity. If not nested, just return minimum of max_num_threads + and icv->thread_limit_var and if thr->thread_pool, set threads_busy + to the returned value. Otherwise, don't update atomically + gomp_remaining_threads_count, but instead thr->thread_pool->threads_busy. + (GOMP_parallel_end): Adjust for thread_limit now being in + icv->thread_limit_var. Use UINT_MAX instead of ULONG_MAX as + infinity. Adjust threads_busy in the pool rather than + gomp_remaining_threads_count. Remember team->nthreads and call + gomp_team_end before adjusting threads_busy, if not nested + afterwards, just set it to 1 non-atomically. Add ialias. + (GOMP_parallel_start): Adjust gomp_team_start caller. + * testsuite/libgomp.c/atomic-14.c: Add parens to make it valid. + * testsuite/libgomp.c/affinity-1.c: New test. + * testsuite/libgomp.c/atomic-15.c: New test. + * testsuite/libgomp.c/atomic-16.c: New test. + * testsuite/libgomp.c/atomic-17.c: New test. + * testsuite/libgomp.c/cancel-for-1.c: New test. + * testsuite/libgomp.c/cancel-for-2.c: New test. + * testsuite/libgomp.c/cancel-parallel-1.c: New test. + * testsuite/libgomp.c/cancel-parallel-2.c: New test. + * testsuite/libgomp.c/cancel-parallel-3.c: New test. + * testsuite/libgomp.c/cancel-sections-1.c: New test. + * testsuite/libgomp.c/cancel-taskgroup-1.c: New test. + * testsuite/libgomp.c/cancel-taskgroup-2.c: New test. + * testsuite/libgomp.c/depend-1.c: New test. + * testsuite/libgomp.c/depend-2.c: New test. + * testsuite/libgomp.c/depend-3.c: New test. + * testsuite/libgomp.c/depend-4.c: New test. + * testsuite/libgomp.c/for-1.c: New test. + * testsuite/libgomp.c/for-1.h: New file. + * testsuite/libgomp.c/for-2.c: New test. + * testsuite/libgomp.c/for-2.h: New file. + * testsuite/libgomp.c/for-3.c: New test. + * testsuite/libgomp.c/pr58392.c: New test. + * testsuite/libgomp.c/simd-1.c: New test. + * testsuite/libgomp.c/simd-2.c: New test. + * testsuite/libgomp.c/simd-3.c: New test. + * testsuite/libgomp.c/simd-4.c: New test. + * testsuite/libgomp.c/simd-5.c: New test. + * testsuite/libgomp.c/simd-6.c: New test. + * testsuite/libgomp.c/target-1.c: New test. + * testsuite/libgomp.c/target-2.c: New test. + * testsuite/libgomp.c/target-3.c: New test. + * testsuite/libgomp.c/target-4.c: New test. + * testsuite/libgomp.c/target-5.c: New test. + * testsuite/libgomp.c/target-6.c: New test. + * testsuite/libgomp.c/target-7.c: New test. + * testsuite/libgomp.c/taskgroup-1.c: New test. + * testsuite/libgomp.c/thread-limit-1.c: New test. + * testsuite/libgomp.c/thread-limit-2.c: New test. + * testsuite/libgomp.c/thread-limit-3.c: New test. + * testsuite/libgomp.c/udr-1.c: New test. + * testsuite/libgomp.c/udr-2.c: New test. + * testsuite/libgomp.c/udr-3.c: New test. + * testsuite/libgomp.c++/affinity-1.C: New test. + * testsuite/libgomp.c++/atomic-10.C: New test. + * testsuite/libgomp.c++/atomic-11.C: New test. + * testsuite/libgomp.c++/atomic-12.C: New test. + * testsuite/libgomp.c++/atomic-13.C: New test. + * testsuite/libgomp.c++/atomic-14.C: New test. + * testsuite/libgomp.c++/atomic-15.C: New test. + * testsuite/libgomp.c++/cancel-for-1.C: New test. + * testsuite/libgomp.c++/cancel-for-2.C: New test. + * testsuite/libgomp.c++/cancel-parallel-1.C: New test. + * testsuite/libgomp.c++/cancel-parallel-2.C: New test. + * testsuite/libgomp.c++/cancel-parallel-3.C: New test. + * testsuite/libgomp.c++/cancel-sections-1.C: New test. + * testsuite/libgomp.c++/cancel-taskgroup-1.C: New test. + * testsuite/libgomp.c++/cancel-taskgroup-2.C: New test. + * testsuite/libgomp.c++/cancel-taskgroup-3.C: New test. + * testsuite/libgomp.c++/cancel-test.h: New file. + * testsuite/libgomp.c++/for-9.C: New test. + * testsuite/libgomp.c++/for-10.C: New test. + * testsuite/libgomp.c++/for-11.C: New test. + * testsuite/libgomp.c++/simd-1.C: New test. + * testsuite/libgomp.c++/simd-2.C: New test. + * testsuite/libgomp.c++/simd-3.C: New test. + * testsuite/libgomp.c++/simd-4.C: New test. + * testsuite/libgomp.c++/simd-5.C: New test. + * testsuite/libgomp.c++/simd-6.C: New test. + * testsuite/libgomp.c++/simd-7.C: New test. + * testsuite/libgomp.c++/simd-8.C: New test. + * testsuite/libgomp.c++/target-1.C: New test. + * testsuite/libgomp.c++/target-2.C: New test. + * testsuite/libgomp.c++/target-2-aux.cc: New file. + * testsuite/libgomp.c++/target-3.C: New test. + * testsuite/libgomp.c++/taskgroup-1.C: New test. + * testsuite/libgomp.c++/udr-1.C: New test. + * testsuite/libgomp.c++/udr-2.C: New test. + * testsuite/libgomp.c++/udr-3.C: New test. + * testsuite/libgomp.c++/udr-4.C: New test. + * testsuite/libgomp.c++/udr-5.C: New test. + * testsuite/libgomp.c++/udr-6.C: New test. + * testsuite/libgomp.c++/udr-7.C: New test. + * testsuite/libgomp.c++/udr-8.C: New test. + * testsuite/libgomp.c++/udr-9.C: New test. + 2013-09-20 Jakub Jelinek <jakub@redhat.com> PR testsuite/57605 diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 2bc4986..e546dbe 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -60,7 +60,7 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS) libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ - time.c fortran.c affinity.c + time.c fortran.c affinity.c target.c nodist_noinst_HEADERS = libgomp_f.h nodist_libsubinclude_HEADERS = omp.h diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index ecc010e..06048e7c 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -96,7 +96,7 @@ am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \ error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \ parallel.lo sections.lo single.lo task.lo team.lo work.lo \ lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \ - fortran.lo affinity.lo + fortran.lo affinity.lo target.lo libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/../depcomp @@ -317,7 +317,7 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS) libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ - time.c fortran.c affinity.c + time.c fortran.c affinity.c target.c nodist_noinst_HEADERS = libgomp_f.h nodist_libsubinclude_HEADERS = omp.h @@ -474,6 +474,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@ diff --git a/libgomp/barrier.c b/libgomp/barrier.c index dfd0bd4..2a9fe8f 100644 --- a/libgomp/barrier.c +++ b/libgomp/barrier.c @@ -39,3 +39,15 @@ GOMP_barrier (void) gomp_team_barrier_wait (&team->barrier); } + +bool +GOMP_barrier_cancel (void) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + + /* The compiler transforms to barrier_cancel when it sees that the + barrier is within a construct that can cancel. Thus we should + never have an orphaned cancellable barrier. */ + return gomp_team_barrier_wait_cancel (&team->barrier); +} diff --git a/libgomp/config/linux/affinity.c b/libgomp/config/linux/affinity.c index dc6c7e5..789cdce 100644 --- a/libgomp/config/linux/affinity.c +++ b/libgomp/config/linux/affinity.c @@ -29,90 +29,327 @@ #endif #include "libgomp.h" #include "proc.h" +#include <errno.h> #include <stdlib.h> +#include <stdio.h> +#include <string.h> #include <unistd.h> #ifdef HAVE_PTHREAD_AFFINITY_NP -static unsigned int affinity_counter; +#ifndef CPU_ALLOC_SIZE +#define CPU_ISSET_S(idx, size, set) CPU_ISSET(idx, set) +#define CPU_ZERO_S(size, set) CPU_ZERO(set) +#define CPU_SET_S(idx, size, set) CPU_SET(idx, set) +#define CPU_CLR_S(idx, size, set) CPU_CLR(idx, set) +#endif void gomp_init_affinity (void) { - cpu_set_t cpuset, cpusetnew; - size_t idx, widx; - unsigned long cpus = 0; + if (gomp_places_list == NULL) + { + if (!gomp_affinity_init_level (1, ULONG_MAX, true)) + return; + } + + struct gomp_thread *thr = gomp_thread (); + pthread_setaffinity_np (pthread_self (), gomp_cpuset_size, + (cpu_set_t *) gomp_places_list[0]); + thr->place = 1; + thr->ts.place_partition_off = 0; + thr->ts.place_partition_len = gomp_places_list_len; +} + +void +gomp_init_thread_affinity (pthread_attr_t *attr, unsigned int place) +{ + pthread_attr_setaffinity_np (attr, gomp_cpuset_size, + (cpu_set_t *) gomp_places_list[place]); +} + +void ** +gomp_affinity_alloc (unsigned long count, bool quiet) +{ + unsigned long i; + void **ret; + char *p; + + if (gomp_cpusetp == NULL) + { + if (!quiet) + gomp_error ("Could not get CPU affinity set"); + return NULL; + } - if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), &cpuset)) + ret = malloc (count * sizeof (void *) + count * gomp_cpuset_size); + if (ret == NULL) { - gomp_error ("could not get CPU affinity set"); - free (gomp_cpu_affinity); - gomp_cpu_affinity = NULL; - gomp_cpu_affinity_len = 0; - return; + if (!quiet) + gomp_error ("Out of memory trying to allocate places list"); + return NULL; } - CPU_ZERO (&cpusetnew); - if (gomp_cpu_affinity_len == 0) + p = (char *) (ret + count); + for (i = 0; i < count; i++, p += gomp_cpuset_size) + ret[i] = p; + return ret; +} + +void +gomp_affinity_init_place (void *p) +{ + cpu_set_t *cpusetp = (cpu_set_t *) p; + CPU_ZERO_S (gomp_cpuset_size, cpusetp); +} + +bool +gomp_affinity_add_cpus (void *p, unsigned long num, + unsigned long len, long stride, bool quiet) +{ + cpu_set_t *cpusetp = (cpu_set_t *) p; + unsigned long max = 8 * gomp_cpuset_size; + for (;;) { - unsigned long count = gomp_cpuset_popcount (&cpuset); - if (count >= 65536) - count = 65536; - gomp_cpu_affinity = malloc (count * sizeof (unsigned short)); - if (gomp_cpu_affinity == NULL) + if (num >= max) + { + if (!quiet) + gomp_error ("Logical CPU number %lu out of range", num); + return false; + } + CPU_SET_S (num, gomp_cpuset_size, cpusetp); + if (--len == 0) + return true; + if ((stride < 0 && num + stride > num) + || (stride > 0 && num + stride < num)) { - gomp_error ("not enough memory to store CPU affinity list"); - return; + if (!quiet) + gomp_error ("Logical CPU number %lu+%ld out of range", + num, stride); + return false; } - for (widx = idx = 0; widx < count && idx < 65536; idx++) - if (CPU_ISSET (idx, &cpuset)) + num += stride; + } +} + +bool +gomp_affinity_remove_cpu (void *p, unsigned long num) +{ + cpu_set_t *cpusetp = (cpu_set_t *) p; + if (num >= 8 * gomp_cpuset_size) + { + gomp_error ("Logical CPU number %lu out of range", num); + return false; + } + if (!CPU_ISSET_S (num, gomp_cpuset_size, cpusetp)) + { + gomp_error ("Logical CPU %lu to be removed is not in the set", num); + return false; + } + CPU_CLR_S (num, gomp_cpuset_size, cpusetp); + return true; +} + +bool +gomp_affinity_copy_place (void *p, void *q, long stride) +{ + unsigned long i, max = 8 * gomp_cpuset_size; + cpu_set_t *destp = (cpu_set_t *) p; + cpu_set_t *srcp = (cpu_set_t *) q; + + CPU_ZERO_S (gomp_cpuset_size, destp); + for (i = 0; i < max; i++) + if (CPU_ISSET_S (i, gomp_cpuset_size, srcp)) + { + if ((stride < 0 && i + stride > i) + || (stride > 0 && (i + stride < i || i + stride >= max))) + { + gomp_error ("Logical CPU number %lu+%ld out of range", i, stride); + return false; + } + CPU_SET_S (i + stride, gomp_cpuset_size, destp); + } + return true; +} + +bool +gomp_affinity_same_place (void *p, void *q) +{ +#ifdef CPU_EQUAL_S + return CPU_EQUAL_S (gomp_cpuset_size, (cpu_set_t *) p, (cpu_set_t *) q); +#else + return memcmp (p, q, gomp_cpuset_size) == 0; +#endif +} + +bool +gomp_affinity_finalize_place_list (bool quiet) +{ + unsigned long i, j; + + for (i = 0, j = 0; i < gomp_places_list_len; i++) + { + cpu_set_t *cpusetp = (cpu_set_t *) gomp_places_list[i]; + bool nonempty = false; +#ifdef CPU_AND_S + CPU_AND_S (gomp_cpuset_size, cpusetp, cpusetp, gomp_cpusetp); + nonempty = gomp_cpuset_popcount (gomp_cpuset_size, cpusetp) != 0; +#else + unsigned long k, max = gomp_cpuset_size / sizeof (cpusetp->__bits[0]); + for (k = 0; k < max; k++) + if ((cpusetp->__bits[k] &= gomp_cpusetp->__bits[k]) != 0) + nonempty = true; +#endif + if (nonempty) + gomp_places_list[j++] = gomp_places_list[i]; + } + + if (j == 0) + { + if (!quiet) + gomp_error ("None of the places contain usable logical CPUs"); + return false; + } + else if (j < gomp_places_list_len) + { + if (!quiet) + gomp_error ("Number of places reduced from %ld to %ld because some " + "places didn't contain any usable logical CPUs", + gomp_places_list_len, j); + gomp_places_list_len = j; + } + return true; +} + +bool +gomp_affinity_init_level (int level, unsigned long count, bool quiet) +{ + unsigned long i, max = 8 * gomp_cpuset_size; + + if (gomp_cpusetp) + { + unsigned long maxcount + = gomp_cpuset_popcount (gomp_cpuset_size, gomp_cpusetp); + if (count > maxcount) + count = maxcount; + } + gomp_places_list = gomp_affinity_alloc (count, quiet); + gomp_places_list_len = 0; + if (gomp_places_list == NULL) + return false; + /* SMT (threads). */ + if (level == 1) + { + for (i = 0; i < max && gomp_places_list_len < count; i++) + if (CPU_ISSET_S (i, gomp_cpuset_size, gomp_cpusetp)) { - cpus++; - gomp_cpu_affinity[widx++] = idx; + gomp_affinity_init_place (gomp_places_list[gomp_places_list_len]); + gomp_affinity_add_cpus (gomp_places_list[gomp_places_list_len], + i, 1, 0, true); + ++gomp_places_list_len; } + return true; } else - for (widx = idx = 0; idx < gomp_cpu_affinity_len; idx++) - if (gomp_cpu_affinity[idx] < CPU_SETSIZE - && CPU_ISSET (gomp_cpu_affinity[idx], &cpuset)) + { + char name[sizeof ("/sys/devices/system/cpu/cpu/topology/" + "thread_siblings_list") + 3 * sizeof (unsigned long)]; + size_t prefix_len = sizeof ("/sys/devices/system/cpu/cpu") - 1; + cpu_set_t *copy = gomp_alloca (gomp_cpuset_size); + FILE *f; + char *line = NULL; + size_t linelen = 0; + + memcpy (name, "/sys/devices/system/cpu/cpu", prefix_len); + memcpy (copy, gomp_cpusetp, gomp_cpuset_size); + for (i = 0; i < max && gomp_places_list_len < count; i++) + if (CPU_ISSET_S (i, gomp_cpuset_size, copy)) + { + sprintf (name + prefix_len, "%lu/topology/%s_siblings_list", + i, level == 2 ? "thread" : "core"); + f = fopen (name, "r"); + if (f != NULL) + { + if (getline (&line, &linelen, f) > 0) + { + char *p = line; + bool seen_i = false; + void *pl = gomp_places_list[gomp_places_list_len]; + gomp_affinity_init_place (pl); + while (*p && *p != '\n') + { + unsigned long first, last; + errno = 0; + first = strtoul (p, &p, 10); + if (errno) + break; + last = first; + if (*p == '-') + { + errno = 0; + last = strtoul (p + 1, &p, 10); + if (errno || last < first) + break; + } + for (; first <= last; first++) + if (CPU_ISSET_S (first, gomp_cpuset_size, copy) + && gomp_affinity_add_cpus (pl, first, 1, 0, + true)) + { + CPU_CLR_S (first, gomp_cpuset_size, copy); + if (first == i) + seen_i = true; + } + if (*p == ',') + ++p; + } + if (seen_i) + gomp_places_list_len++; + } + fclose (f); + } + } + if (gomp_places_list == 0) { - if (! CPU_ISSET (gomp_cpu_affinity[idx], &cpusetnew)) - { - cpus++; - CPU_SET (gomp_cpu_affinity[idx], &cpusetnew); - } - gomp_cpu_affinity[widx++] = gomp_cpu_affinity[idx]; + if (!quiet) + gomp_error ("Error reading %s topology", + level == 2 ? "core" : "socket"); + free (gomp_places_list); + gomp_places_list = NULL; + return false; } - - if (widx == 0) - { - gomp_error ("no CPUs left for affinity setting"); - free (gomp_cpu_affinity); - gomp_cpu_affinity = NULL; - gomp_cpu_affinity_len = 0; - return; + return true; } - - gomp_cpu_affinity_len = widx; - if (cpus < gomp_available_cpus) - gomp_available_cpus = cpus; - CPU_ZERO (&cpuset); - CPU_SET (gomp_cpu_affinity[0], &cpuset); - pthread_setaffinity_np (pthread_self (), sizeof (cpuset), &cpuset); - affinity_counter = 1; + return false; } void -gomp_init_thread_affinity (pthread_attr_t *attr) +gomp_affinity_print_place (void *p) { - unsigned int cpu; - cpu_set_t cpuset; - - cpu = __atomic_fetch_add (&affinity_counter, 1, MEMMODEL_RELAXED); - cpu %= gomp_cpu_affinity_len; - CPU_ZERO (&cpuset); - CPU_SET (gomp_cpu_affinity[cpu], &cpuset); - pthread_attr_setaffinity_np (attr, sizeof (cpu_set_t), &cpuset); + unsigned long i, max = 8 * gomp_cpuset_size, len; + cpu_set_t *cpusetp = (cpu_set_t *) p; + bool notfirst = false; + + for (i = 0, len = 0; i < max; i++) + if (CPU_ISSET_S (i, gomp_cpuset_size, cpusetp)) + { + if (len == 0) + { + if (notfirst) + fputc (',', stderr); + notfirst = true; + fprintf (stderr, "%lu", i); + } + ++len; + } + else + { + if (len > 1) + fprintf (stderr, ":%lu", len); + len = 0; + } + if (len > 1) + fprintf (stderr, ":%lu", len); } #else diff --git a/libgomp/config/linux/bar.c b/libgomp/config/linux/bar.c index 35baa88..6b591e5 100644 --- a/libgomp/config/linux/bar.c +++ b/libgomp/config/linux/bar.c @@ -33,11 +33,11 @@ void gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) { - if (__builtin_expect ((state & 1) != 0, 0)) + if (__builtin_expect (state & BAR_WAS_LAST, 0)) { /* Next time we'll be awaiting TOTAL threads again. */ bar->awaited = bar->total; - __atomic_store_n (&bar->generation, bar->generation + 4, + __atomic_store_n (&bar->generation, bar->generation + BAR_INCR, MEMMODEL_RELEASE); futex_wake ((int *) &bar->generation, INT_MAX); } @@ -66,7 +66,7 @@ void gomp_barrier_wait_last (gomp_barrier_t *bar) { gomp_barrier_state_t state = gomp_barrier_wait_start (bar); - if (state & 1) + if (state & BAR_WAS_LAST) gomp_barrier_wait_end (bar, state); } @@ -81,40 +81,43 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) { unsigned int generation, gen; - if (__builtin_expect ((state & 1) != 0, 0)) + if (__builtin_expect (state & BAR_WAS_LAST, 0)) { /* Next time we'll be awaiting TOTAL threads again. */ struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; bar->awaited = bar->total; + team->work_share_cancelled = 0; if (__builtin_expect (team->task_count, 0)) { gomp_barrier_handle_tasks (state); - state &= ~1; + state &= ~BAR_WAS_LAST; } else { - __atomic_store_n (&bar->generation, state + 3, MEMMODEL_RELEASE); + state &= ~BAR_CANCELLED; + state += BAR_INCR - BAR_WAS_LAST; + __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE); futex_wake ((int *) &bar->generation, INT_MAX); return; } } generation = state; + state &= ~BAR_CANCELLED; do { do_wait ((int *) &bar->generation, generation); gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE); - if (__builtin_expect (gen & 1, 0)) + if (__builtin_expect (gen & BAR_TASK_PENDING, 0)) { gomp_barrier_handle_tasks (state); gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE); } - if ((gen & 2) != 0) - generation |= 2; + generation |= gen & BAR_WAITING_FOR_TASK; } - while (gen != state + 4); + while (gen != state + BAR_INCR); } void @@ -122,3 +125,86 @@ gomp_team_barrier_wait (gomp_barrier_t *bar) { gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar)); } + +void +gomp_team_barrier_wait_final (gomp_barrier_t *bar) +{ + gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar); + if (__builtin_expect (state & BAR_WAS_LAST, 0)) + bar->awaited_final = bar->total; + gomp_team_barrier_wait_end (bar, state); +} + +bool +gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar, + gomp_barrier_state_t state) +{ + unsigned int generation, gen; + + if (__builtin_expect (state & BAR_WAS_LAST, 0)) + { + /* Next time we'll be awaiting TOTAL threads again. */ + /* BAR_CANCELLED should never be set in state here, because + cancellation means that at least one of the threads has been + cancelled, thus on a cancellable barrier we should never see + all threads to arrive. */ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + + bar->awaited = bar->total; + team->work_share_cancelled = 0; + if (__builtin_expect (team->task_count, 0)) + { + gomp_barrier_handle_tasks (state); + state &= ~BAR_WAS_LAST; + } + else + { + state += BAR_INCR - BAR_WAS_LAST; + __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE); + futex_wake ((int *) &bar->generation, INT_MAX); + return false; + } + } + + if (__builtin_expect (state & BAR_CANCELLED, 0)) + return true; + + generation = state; + do + { + do_wait ((int *) &bar->generation, generation); + gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE); + if (__builtin_expect (gen & BAR_CANCELLED, 0)) + return true; + if (__builtin_expect (gen & BAR_TASK_PENDING, 0)) + { + gomp_barrier_handle_tasks (state); + gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE); + } + generation |= gen & BAR_WAITING_FOR_TASK; + } + while (gen != state + BAR_INCR); + + return false; +} + +bool +gomp_team_barrier_wait_cancel (gomp_barrier_t *bar) +{ + return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar)); +} + +void +gomp_team_barrier_cancel (struct gomp_team *team) +{ + gomp_mutex_lock (&team->task_lock); + if (team->barrier.generation & BAR_CANCELLED) + { + gomp_mutex_unlock (&team->task_lock); + return; + } + team->barrier.generation |= BAR_CANCELLED; + gomp_mutex_unlock (&team->task_lock); + futex_wake ((int *) &team->barrier.generation, INT_MAX); +} diff --git a/libgomp/config/linux/bar.h b/libgomp/config/linux/bar.h index 69b9706..914c867 100644 --- a/libgomp/config/linux/bar.h +++ b/libgomp/config/linux/bar.h @@ -38,13 +38,25 @@ typedef struct unsigned total __attribute__((aligned (64))); unsigned generation; unsigned awaited __attribute__((aligned (64))); + unsigned awaited_final; } gomp_barrier_t; + typedef unsigned int gomp_barrier_state_t; +/* The generation field contains a counter in the high bits, with a few + low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can + share space because WAS_LAST is never stored back to generation. */ +#define BAR_TASK_PENDING 1 +#define BAR_WAS_LAST 1 +#define BAR_WAITING_FOR_TASK 2 +#define BAR_CANCELLED 4 +#define BAR_INCR 8 + static inline void gomp_barrier_init (gomp_barrier_t *bar, unsigned count) { bar->total = count; bar->awaited = count; + bar->awaited_final = count; bar->generation = 0; } @@ -62,27 +74,55 @@ extern void gomp_barrier_wait (gomp_barrier_t *); extern void gomp_barrier_wait_last (gomp_barrier_t *); extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t); extern void gomp_team_barrier_wait (gomp_barrier_t *); +extern void gomp_team_barrier_wait_final (gomp_barrier_t *); extern void gomp_team_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t); +extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *); +extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *, + gomp_barrier_state_t); extern void gomp_team_barrier_wake (gomp_barrier_t *, int); +struct gomp_team; +extern void gomp_team_barrier_cancel (struct gomp_team *); static inline gomp_barrier_state_t gomp_barrier_wait_start (gomp_barrier_t *bar) { - unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE) & ~3; + unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE); + ret &= -BAR_INCR | BAR_CANCELLED; /* A memory barrier is needed before exiting from the various forms of gomp_barrier_wait, to satisfy OpenMP API version 3.1 section 2.8.6 flush Construct, which says there is an implicit flush during a barrier region. This is a convenient place to add the barrier, so we use MEMMODEL_ACQ_REL here rather than MEMMODEL_ACQUIRE. */ - ret += __atomic_add_fetch (&bar->awaited, -1, MEMMODEL_ACQ_REL) == 0; + if (__atomic_add_fetch (&bar->awaited, -1, MEMMODEL_ACQ_REL) == 0) + ret |= BAR_WAS_LAST; + return ret; +} + +static inline gomp_barrier_state_t +gomp_barrier_wait_cancel_start (gomp_barrier_t *bar) +{ + return gomp_barrier_wait_start (bar); +} + +/* This is like gomp_barrier_wait_start, except it decrements + bar->awaited_final rather than bar->awaited and should be used + for the gomp_team_end barrier only. */ +static inline gomp_barrier_state_t +gomp_barrier_wait_final_start (gomp_barrier_t *bar) +{ + unsigned int ret = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE); + ret &= -BAR_INCR | BAR_CANCELLED; + /* See above gomp_barrier_wait_start comment. */ + if (__atomic_add_fetch (&bar->awaited_final, -1, MEMMODEL_ACQ_REL) == 0) + ret |= BAR_WAS_LAST; return ret; } static inline bool gomp_barrier_last_thread (gomp_barrier_state_t state) { - return state & 1; + return state & BAR_WAS_LAST; } /* All the inlines below must be called with team->task_lock @@ -91,31 +131,37 @@ gomp_barrier_last_thread (gomp_barrier_state_t state) static inline void gomp_team_barrier_set_task_pending (gomp_barrier_t *bar) { - bar->generation |= 1; + bar->generation |= BAR_TASK_PENDING; } static inline void gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar) { - bar->generation &= ~1; + bar->generation &= ~BAR_TASK_PENDING; } static inline void gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar) { - bar->generation |= 2; + bar->generation |= BAR_WAITING_FOR_TASK; } static inline bool gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar) { - return (bar->generation & 2) != 0; + return (bar->generation & BAR_WAITING_FOR_TASK) != 0; +} + +static inline bool +gomp_team_barrier_cancelled (gomp_barrier_t *bar) +{ + return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0); } static inline void gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state) { - bar->generation = (state & ~3) + 4; + bar->generation = (state & -BAR_INCR) + BAR_INCR; } #endif /* GOMP_BARRIER_H */ diff --git a/libgomp/config/linux/proc.c b/libgomp/config/linux/proc.c index cbb773e..d4ae116 100644 --- a/libgomp/config/linux/proc.c +++ b/libgomp/config/linux/proc.c @@ -30,6 +30,7 @@ #endif #include "libgomp.h" #include "proc.h" +#include <errno.h> #include <stdlib.h> #include <unistd.h> #ifdef HAVE_GETLOADAVG @@ -39,19 +40,28 @@ #endif #ifdef HAVE_PTHREAD_AFFINITY_NP +unsigned long gomp_cpuset_size; +static unsigned long gomp_get_cpuset_size; +cpu_set_t *gomp_cpusetp; + unsigned long -gomp_cpuset_popcount (cpu_set_t *cpusetp) +gomp_cpuset_popcount (unsigned long cpusetsize, cpu_set_t *cpusetp) { -#ifdef CPU_COUNT - /* glibc 2.6 and above provide a macro for this. */ - return CPU_COUNT (cpusetp); +#ifdef CPU_COUNT_S + /* glibc 2.7 and above provide a macro for this. */ + return CPU_COUNT_S (cpusetsize, cpusetp); #else +#ifdef CPU_COUNT + if (cpusetsize == sizeof (cpu_set_t)) + /* glibc 2.6 and above provide a macro for this. */ + return CPU_COUNT (cpusetp); +#endif size_t i; unsigned long ret = 0; - extern int check[sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int)]; + extern int check[sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int) + ? 1 : -1]; - (void) check; - for (i = 0; i < sizeof (*cpusetp) / sizeof (cpusetp->__bits[0]); i++) + for (i = 0; i < cpusetsize / sizeof (cpusetp->__bits[0]); i++) { unsigned long int mask = cpusetp->__bits[i]; if (mask == 0) @@ -70,16 +80,63 @@ void gomp_init_num_threads (void) { #ifdef HAVE_PTHREAD_AFFINITY_NP - cpu_set_t cpuset; +#if defined (_SC_NPROCESSORS_CONF) && defined (CPU_ALLOC_SIZE) + gomp_cpuset_size = sysconf (_SC_NPROCESSORS_CONF); + gomp_cpuset_size = CPU_ALLOC_SIZE (gomp_cpuset_size); +#else + gomp_cpuset_size = sizeof (cpu_set_t); +#endif - if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), &cpuset) == 0) + gomp_cpusetp = (cpu_set_t *) gomp_malloc (gomp_cpuset_size); + do { - /* Count only the CPUs this process can use. */ - gomp_global_icv.nthreads_var = gomp_cpuset_popcount (&cpuset); - if (gomp_global_icv.nthreads_var == 0) - gomp_global_icv.nthreads_var = 1; - return; + int ret = pthread_getaffinity_np (pthread_self (), gomp_cpuset_size, + gomp_cpusetp); + if (ret == 0) + { + unsigned long i; + /* Count only the CPUs this process can use. */ + gomp_global_icv.nthreads_var + = gomp_cpuset_popcount (gomp_cpuset_size, gomp_cpusetp); + if (gomp_global_icv.nthreads_var == 0) + break; + gomp_get_cpuset_size = gomp_cpuset_size; +#ifdef CPU_ALLOC_SIZE + for (i = gomp_cpuset_size * 8; i; i--) + if (CPU_ISSET_S (i - 1, gomp_cpuset_size, gomp_cpusetp)) + break; + gomp_cpuset_size = CPU_ALLOC_SIZE (i); +#endif + return; + } + if (ret != EINVAL) + break; +#ifdef CPU_ALLOC_SIZE + if (gomp_cpuset_size < sizeof (cpu_set_t)) + gomp_cpuset_size = sizeof (cpu_set_t); + else + gomp_cpuset_size = gomp_cpuset_size * 2; + if (gomp_cpuset_size < 8 * sizeof (cpu_set_t)) + gomp_cpusetp + = (cpu_set_t *) gomp_realloc (gomp_cpusetp, gomp_cpuset_size); + else + { + /* Avoid gomp_fatal if too large memory allocation would be + requested, e.g. kernel returning EINVAL all the time. */ + void *p = realloc (gomp_cpusetp, gomp_cpuset_size); + if (p == NULL) + break; + gomp_cpusetp = (cpu_set_t *) p; + } +#else + break; +#endif } + while (1); + gomp_cpuset_size = 0; + gomp_global_icv.nthreads_var = 1; + free (gomp_cpusetp); + gomp_cpusetp = NULL; #endif #ifdef _SC_NPROCESSORS_ONLN gomp_global_icv.nthreads_var = sysconf (_SC_NPROCESSORS_ONLN); @@ -90,15 +147,14 @@ static int get_num_procs (void) { #ifdef HAVE_PTHREAD_AFFINITY_NP - cpu_set_t cpuset; - - if (gomp_cpu_affinity == NULL) + if (gomp_places_list == NULL) { /* Count only the CPUs this process can use. */ - if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), - &cpuset) == 0) + if (gomp_cpusetp + && pthread_getaffinity_np (pthread_self (), gomp_get_cpuset_size, + gomp_cpusetp) == 0) { - int ret = gomp_cpuset_popcount (&cpuset); + int ret = gomp_cpuset_popcount (gomp_get_cpuset_size, gomp_cpusetp); return ret != 0 ? ret : 1; } } diff --git a/libgomp/config/linux/proc.h b/libgomp/config/linux/proc.h index cba7f4a..bdc85db 100644 --- a/libgomp/config/linux/proc.h +++ b/libgomp/config/linux/proc.h @@ -28,7 +28,10 @@ #include <sched.h> #ifdef HAVE_PTHREAD_AFFINITY_NP -extern unsigned long gomp_cpuset_popcount (cpu_set_t *); +extern unsigned long gomp_cpuset_size attribute_hidden; +extern cpu_set_t *gomp_cpusetp attribute_hidden; +extern unsigned long gomp_cpuset_popcount (unsigned long, cpu_set_t *) + attribute_hidden; #endif #endif /* GOMP_PROC_H */ diff --git a/libgomp/config/posix/affinity.c b/libgomp/config/posix/affinity.c index ac3d14e..e7f97ab 100644 --- a/libgomp/config/posix/affinity.c +++ b/libgomp/config/posix/affinity.c @@ -32,7 +32,84 @@ gomp_init_affinity (void) } void -gomp_init_thread_affinity (pthread_attr_t *attr) +gomp_init_thread_affinity (pthread_attr_t *attr, unsigned int place) { (void) attr; + (void) place; +} + +void ** +gomp_affinity_alloc (unsigned long count, bool quiet) +{ + (void) count; + if (!quiet) + gomp_error ("Affinity not supported on this configuration"); + return NULL; +} + +void +gomp_affinity_init_place (void *p) +{ + (void) p; +} + +bool +gomp_affinity_add_cpus (void *p, unsigned long num, + unsigned long len, long stride, bool quiet) +{ + (void) p; + (void) num; + (void) len; + (void) stride; + (void) quiet; + return false; +} + +bool +gomp_affinity_remove_cpu (void *p, unsigned long num) +{ + (void) p; + (void) num; + return false; +} + +bool +gomp_affinity_copy_place (void *p, void *q, long stride) +{ + (void) p; + (void) q; + (void) stride; + return false; +} + +bool +gomp_affinity_same_place (void *p, void *q) +{ + (void) p; + (void) q; + return false; +} + +bool +gomp_affinity_finalize_place_list (bool quiet) +{ + (void) quiet; + return false; +} + +bool +gomp_affinity_init_level (int level, unsigned long count, bool quiet) +{ + (void) level; + (void) count; + (void) quiet; + if (!quiet) + gomp_error ("Affinity not supported on this configuration"); + return NULL; +} + +void +gomp_affinity_print_place (void *p) +{ + (void) p; } diff --git a/libgomp/config/posix/bar.c b/libgomp/config/posix/bar.c index 06a3185..bdf3978 100644 --- a/libgomp/config/posix/bar.c +++ b/libgomp/config/posix/bar.c @@ -42,6 +42,7 @@ gomp_barrier_init (gomp_barrier_t *bar, unsigned count) bar->total = count; bar->arrived = 0; bar->generation = 0; + bar->cancellable = false; } void @@ -72,7 +73,7 @@ gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) { unsigned int n; - if (state & 1) + if (state & BAR_WAS_LAST) { n = --bar->arrived; if (n > 0) @@ -113,12 +114,14 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) { unsigned int n; - if (state & 1) + state &= ~BAR_CANCELLED; + if (state & BAR_WAS_LAST) { n = --bar->arrived; struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; + team->work_share_cancelled = 0; if (team->task_count) { gomp_barrier_handle_tasks (state); @@ -128,7 +131,7 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) return; } - bar->generation = state + 3; + bar->generation = state + BAR_INCR - BAR_WAS_LAST; if (n > 0) { do @@ -141,13 +144,18 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) else { gomp_mutex_unlock (&bar->mutex1); + int gen; do { gomp_sem_wait (&bar->sem1); - if (bar->generation & 1) - gomp_barrier_handle_tasks (state); + gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE); + if (gen & BAR_TASK_PENDING) + { + gomp_barrier_handle_tasks (state); + gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE); + } } - while (bar->generation != state + 4); + while (gen != state + BAR_INCR); #ifdef HAVE_SYNC_BUILTINS n = __sync_add_and_fetch (&bar->arrived, -1); @@ -162,6 +170,81 @@ gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state) } } +bool +gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar, + gomp_barrier_state_t state) +{ + unsigned int n; + + if (state & BAR_WAS_LAST) + { + bar->cancellable = false; + n = --bar->arrived; + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + + team->work_share_cancelled = 0; + if (team->task_count) + { + gomp_barrier_handle_tasks (state); + if (n > 0) + gomp_sem_wait (&bar->sem2); + gomp_mutex_unlock (&bar->mutex1); + return false; + } + + bar->generation = state + BAR_INCR - BAR_WAS_LAST; + if (n > 0) + { + do + gomp_sem_post (&bar->sem1); + while (--n != 0); + gomp_sem_wait (&bar->sem2); + } + gomp_mutex_unlock (&bar->mutex1); + } + else + { + if (state & BAR_CANCELLED) + { + gomp_mutex_unlock (&bar->mutex1); + return true; + } + bar->cancellable = true; + gomp_mutex_unlock (&bar->mutex1); + int gen; + do + { + gomp_sem_wait (&bar->sem1); + gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE); + if (gen & BAR_CANCELLED) + break; + if (gen & BAR_TASK_PENDING) + { + gomp_barrier_handle_tasks (state); + gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE); + if (gen & BAR_CANCELLED) + break; + } + } + while (gen != state + BAR_INCR); + +#ifdef HAVE_SYNC_BUILTINS + n = __sync_add_and_fetch (&bar->arrived, -1); +#else + gomp_mutex_lock (&bar->mutex2); + n = --bar->arrived; + gomp_mutex_unlock (&bar->mutex2); +#endif + + if (n == 0) + gomp_sem_post (&bar->sem2); + if (gen & BAR_CANCELLED) + return true; + } + return false; +} + void gomp_team_barrier_wait (gomp_barrier_t *barrier) { @@ -176,3 +259,40 @@ gomp_team_barrier_wake (gomp_barrier_t *bar, int count) while (count-- > 0) gomp_sem_post (&bar->sem1); } + +bool +gomp_team_barrier_wait_cancel (gomp_barrier_t *bar) +{ + gomp_barrier_state_t state = gomp_barrier_wait_cancel_start (bar); + return gomp_team_barrier_wait_cancel_end (bar, state); +} + +void +gomp_team_barrier_cancel (struct gomp_team *team) +{ + if (team->barrier.generation & BAR_CANCELLED) + return; + gomp_mutex_lock (&team->barrier.mutex1); + gomp_mutex_lock (&team->task_lock); + if (team->barrier.generation & BAR_CANCELLED) + { + gomp_mutex_unlock (&team->task_lock); + gomp_mutex_unlock (&team->barrier.mutex1); + return; + } + team->barrier.generation |= BAR_CANCELLED; + gomp_mutex_unlock (&team->task_lock); + if (team->barrier.cancellable) + { + int n = team->barrier.arrived; + if (n > 0) + { + do + gomp_sem_post (&team->barrier.sem1); + while (--n != 0); + gomp_sem_wait (&team->barrier.sem2); + } + team->barrier.cancellable = false; + } + gomp_mutex_unlock (&team->barrier.mutex1); +} diff --git a/libgomp/config/posix/bar.h b/libgomp/config/posix/bar.h index 1a16ca8..9fcd4da 100644 --- a/libgomp/config/posix/bar.h +++ b/libgomp/config/posix/bar.h @@ -43,9 +43,20 @@ typedef struct unsigned total; unsigned arrived; unsigned generation; + bool cancellable; } gomp_barrier_t; + typedef unsigned int gomp_barrier_state_t; +/* The generation field contains a counter in the high bits, with a few + low bits dedicated to flags. Note that TASK_PENDING and WAS_LAST can + share space because WAS_LAST is never stored back to generation. */ +#define BAR_TASK_PENDING 1 +#define BAR_WAS_LAST 1 +#define BAR_WAITING_FOR_TASK 2 +#define BAR_CANCELLED 4 +#define BAR_INCR 8 + extern void gomp_barrier_init (gomp_barrier_t *, unsigned); extern void gomp_barrier_reinit (gomp_barrier_t *, unsigned); extern void gomp_barrier_destroy (gomp_barrier_t *); @@ -55,22 +66,47 @@ extern void gomp_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t); extern void gomp_team_barrier_wait (gomp_barrier_t *); extern void gomp_team_barrier_wait_end (gomp_barrier_t *, gomp_barrier_state_t); +extern bool gomp_team_barrier_wait_cancel (gomp_barrier_t *); +extern bool gomp_team_barrier_wait_cancel_end (gomp_barrier_t *, + gomp_barrier_state_t); extern void gomp_team_barrier_wake (gomp_barrier_t *, int); +struct gomp_team; +extern void gomp_team_barrier_cancel (struct gomp_team *); static inline gomp_barrier_state_t gomp_barrier_wait_start (gomp_barrier_t *bar) { unsigned int ret; gomp_mutex_lock (&bar->mutex1); - ret = bar->generation & ~3; - ret += ++bar->arrived == bar->total; + ret = bar->generation & (-BAR_INCR | BAR_CANCELLED); + if (++bar->arrived == bar->total) + ret |= BAR_WAS_LAST; + return ret; +} + +static inline gomp_barrier_state_t +gomp_barrier_wait_cancel_start (gomp_barrier_t *bar) +{ + unsigned int ret; + gomp_mutex_lock (&bar->mutex1); + ret = bar->generation & (-BAR_INCR | BAR_CANCELLED); + if (ret & BAR_CANCELLED) + return ret; + if (++bar->arrived == bar->total) + ret |= BAR_WAS_LAST; return ret; } +static inline void +gomp_team_barrier_wait_final (gomp_barrier_t *bar) +{ + gomp_team_barrier_wait (bar); +} + static inline bool gomp_barrier_last_thread (gomp_barrier_state_t state) { - return state & 1; + return state & BAR_WAS_LAST; } static inline void @@ -85,31 +121,37 @@ gomp_barrier_wait_last (gomp_barrier_t *bar) static inline void gomp_team_barrier_set_task_pending (gomp_barrier_t *bar) { - bar->generation |= 1; + bar->generation |= BAR_TASK_PENDING; } static inline void gomp_team_barrier_clear_task_pending (gomp_barrier_t *bar) { - bar->generation &= ~1; + bar->generation &= ~BAR_TASK_PENDING; } static inline void gomp_team_barrier_set_waiting_for_tasks (gomp_barrier_t *bar) { - bar->generation |= 2; + bar->generation |= BAR_WAITING_FOR_TASK; } static inline bool gomp_team_barrier_waiting_for_tasks (gomp_barrier_t *bar) { - return (bar->generation & 2) != 0; + return (bar->generation & BAR_WAITING_FOR_TASK) != 0; +} + +static inline bool +gomp_team_barrier_cancelled (gomp_barrier_t *bar) +{ + return __builtin_expect ((bar->generation & BAR_CANCELLED) != 0, 0); } static inline void gomp_team_barrier_done (gomp_barrier_t *bar, gomp_barrier_state_t state) { - bar->generation = (state & ~3) + 4; + bar->generation = (state & -BAR_INCR) + BAR_INCR; } #endif /* GOMP_BARRIER_H */ diff --git a/libgomp/env.c b/libgomp/env.c index 65cbba8..57997c5 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -29,6 +29,10 @@ #include "libgomp_f.h" #include <ctype.h> #include <stdlib.h> +#include <stdio.h> +#ifdef HAVE_INTTYPES_H +# include <inttypes.h> /* For PRIu64. */ +#endif #ifdef STRING_WITH_STRINGS # include <string.h> # include <strings.h> @@ -50,23 +54,28 @@ struct gomp_task_icv gomp_global_icv = { .nthreads_var = 1, + .thread_limit_var = UINT_MAX, .run_sched_var = GFS_DYNAMIC, .run_sched_modifier = 1, + .default_device_var = 0, .dyn_var = false, - .nest_var = false + .nest_var = false, + .bind_var = omp_proc_bind_false, + .target_data = NULL }; -unsigned short *gomp_cpu_affinity; -size_t gomp_cpu_affinity_len; unsigned long gomp_max_active_levels_var = INT_MAX; -unsigned long gomp_thread_limit_var = ULONG_MAX; -unsigned long gomp_remaining_threads_count; +bool gomp_cancel_var = false; #ifndef HAVE_SYNC_BUILTINS -gomp_mutex_t gomp_remaining_threads_lock; +gomp_mutex_t gomp_managed_threads_lock; #endif unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1; unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len; +char *gomp_bind_var_list; +unsigned long gomp_bind_var_list_len; +void **gomp_places_list; +unsigned long gomp_places_list_len; /* Parse the OMP_SCHEDULE environment variable. */ @@ -184,6 +193,24 @@ parse_unsigned_long (const char *name, unsigned long *pvalue, bool allow_zero) return false; } +/* Parse a positive int environment variable. Return true if one was + present and it was successfully parsed. */ + +static bool +parse_int (const char *name, int *pvalue, bool allow_zero) +{ + unsigned long value; + if (!parse_unsigned_long (name, &value, allow_zero)) + return false; + if (value > INT_MAX) + { + gomp_error ("Invalid value for environment variable %s", name); + return false; + } + *pvalue = (int) value; + return true; +} + /* Parse an unsigned long list environment variable. Return true if one was present and it was successfully parsed. */ @@ -273,6 +300,412 @@ parse_unsigned_long_list (const char *name, unsigned long *p1stvalue, return false; } +/* Parse environment variable set to a boolean or list of omp_proc_bind_t + enum values. Return true if one was present and it was successfully + parsed. */ + +static bool +parse_bind_var (const char *name, char *p1stvalue, + char **pvalues, unsigned long *pnvalues) +{ + char *env; + char value, *values = NULL; + int i; + static struct proc_bind_kinds + { + const char name[7]; + const char len; + omp_proc_bind_t kind; + } kinds[] = + { + { "false", 5, omp_proc_bind_false }, + { "true", 4, omp_proc_bind_true }, + { "master", 6, omp_proc_bind_master }, + { "close", 5, omp_proc_bind_close }, + { "spread", 6, omp_proc_bind_spread } + }; + + env = getenv (name); + if (env == NULL) + return false; + + while (isspace ((unsigned char) *env)) + ++env; + if (*env == '\0') + goto invalid; + + for (i = 0; i < 5; i++) + if (strncasecmp (env, kinds[i].name, kinds[i].len) == 0) + { + value = kinds[i].kind; + env += kinds[i].len; + break; + } + if (i == 5) + goto invalid; + + while (isspace ((unsigned char) *env)) + ++env; + if (*env != '\0') + { + if (*env == ',') + { + unsigned long nvalues = 0, nalloced = 0; + + if (value == omp_proc_bind_false + || value == omp_proc_bind_true) + goto invalid; + + do + { + env++; + if (nvalues == nalloced) + { + char *n; + nalloced = nalloced ? nalloced * 2 : 16; + n = realloc (values, nalloced); + if (n == NULL) + { + free (values); + gomp_error ("Out of memory while trying to parse" + " environment variable %s", name); + return false; + } + values = n; + if (nvalues == 0) + values[nvalues++] = value; + } + + while (isspace ((unsigned char) *env)) + ++env; + if (*env == '\0') + goto invalid; + + for (i = 2; i < 5; i++) + if (strncasecmp (env, kinds[i].name, kinds[i].len) == 0) + { + value = kinds[i].kind; + env += kinds[i].len; + break; + } + if (i == 5) + goto invalid; + + values[nvalues++] = value; + while (isspace ((unsigned char) *env)) + ++env; + if (*env == '\0') + break; + if (*env != ',') + goto invalid; + } + while (1); + *p1stvalue = values[0]; + *pvalues = values; + *pnvalues = nvalues; + return true; + } + goto invalid; + } + + *p1stvalue = value; + return true; + + invalid: + free (values); + gomp_error ("Invalid value for environment variable %s", name); + return false; +} + +static bool +parse_one_place (char **envp, bool *negatep, unsigned long *lenp, + long *stridep) +{ + char *env = *envp, *start; + void *p = gomp_places_list ? gomp_places_list[gomp_places_list_len] : NULL; + unsigned long len = 1; + long stride = 1; + int pass; + bool any_negate = false; + *negatep = false; + while (isspace ((unsigned char) *env)) + ++env; + if (*env == '!') + { + *negatep = true; + ++env; + while (isspace ((unsigned char) *env)) + ++env; + } + if (*env != '{') + return false; + ++env; + while (isspace ((unsigned char) *env)) + ++env; + start = env; + for (pass = 0; pass < (any_negate ? 2 : 1); pass++) + { + env = start; + do + { + unsigned long this_num, this_len = 1; + long this_stride = 1; + bool this_negate = (*env == '!'); + if (this_negate) + { + if (gomp_places_list) + any_negate = true; + ++env; + while (isspace ((unsigned char) *env)) + ++env; + } + + errno = 0; + this_num = strtoul (env, &env, 10); + if (errno) + return false; + while (isspace ((unsigned char) *env)) + ++env; + if (*env == ':') + { + ++env; + while (isspace ((unsigned char) *env)) + ++env; + errno = 0; + this_len = strtoul (env, &env, 10); + if (errno || this_len == 0) + return false; + while (isspace ((unsigned char) *env)) + ++env; + if (*env == ':') + { + ++env; + while (isspace ((unsigned char) *env)) + ++env; + errno = 0; + this_stride = strtol (env, &env, 10); + if (errno) + return false; + while (isspace ((unsigned char) *env)) + ++env; + } + } + if (this_negate && this_len != 1) + return false; + if (gomp_places_list && pass == this_negate) + { + if (this_negate) + { + if (!gomp_affinity_remove_cpu (p, this_num)) + return false; + } + else if (!gomp_affinity_add_cpus (p, this_num, this_len, + this_stride, false)) + return false; + } + if (*env == '}') + break; + if (*env != ',') + return false; + ++env; + } + while (1); + } + + ++env; + while (isspace ((unsigned char) *env)) + ++env; + if (*env == ':') + { + ++env; + while (isspace ((unsigned char) *env)) + ++env; + errno = 0; + len = strtoul (env, &env, 10); + if (errno || len == 0 || len >= 65536) + return false; + while (isspace ((unsigned char) *env)) + ++env; + if (*env == ':') + { + ++env; + while (isspace ((unsigned char) *env)) + ++env; + errno = 0; + stride = strtol (env, &env, 10); + if (errno) + return false; + while (isspace ((unsigned char) *env)) + ++env; + } + } + if (*negatep && len != 1) + return false; + *envp = env; + *lenp = len; + *stridep = stride; + return true; +} + +static bool +parse_places_var (const char *name) +{ + char *env = getenv (name), *end; + bool any_negate = false; + int level = 0; + unsigned long count = 0; + if (env == NULL) + return false; + + while (isspace ((unsigned char) *env)) + ++env; + if (*env == '\0') + goto invalid; + + if (strncasecmp (env, "threads", 7) == 0) + { + env += 7; + level = 1; + } + else if (strncasecmp (env, "cores", 5) == 0) + { + env += 5; + level = 2; + } + else if (strncasecmp (env, "sockets", 7) == 0) + { + env += 7; + level = 3; + } + if (level) + { + count = ULONG_MAX; + while (isspace ((unsigned char) *env)) + ++env; + if (*env != '\0') + { + if (*env++ != '(') + goto invalid; + while (isspace ((unsigned char) *env)) + ++env; + + errno = 0; + count = strtoul (env, &end, 10); + if (errno) + goto invalid; + env = end; + while (isspace ((unsigned char) *env)) + ++env; + if (*env != ')') + goto invalid; + ++env; + while (isspace ((unsigned char) *env)) + ++env; + if (*env != '\0') + goto invalid; + } + return gomp_affinity_init_level (level, count, false); + } + + count = 0; + end = env; + do + { + bool negate; + unsigned long len; + long stride; + if (!parse_one_place (&end, &negate, &len, &stride)) + goto invalid; + if (negate) + { + if (!any_negate) + count++; + any_negate = true; + } + else + count += len; + if (count > 65536) + goto invalid; + if (*end == '\0') + break; + if (*end != ',') + goto invalid; + end++; + } + while (1); + + if (gomp_global_icv.bind_var == omp_proc_bind_false) + return false; + + gomp_places_list_len = 0; + gomp_places_list = gomp_affinity_alloc (count, false); + if (gomp_places_list == NULL) + return false; + + do + { + bool negate; + unsigned long len; + long stride; + gomp_affinity_init_place (gomp_places_list[gomp_places_list_len]); + if (!parse_one_place (&env, &negate, &len, &stride)) + goto invalid; + if (negate) + { + void *p; + for (count = 0; count < gomp_places_list_len; count++) + if (gomp_affinity_same_place + (gomp_places_list[count], + gomp_places_list[gomp_places_list_len])) + break; + if (count == gomp_places_list_len) + { + gomp_error ("Trying to remove a non-existing place from list " + "of places"); + goto invalid; + } + p = gomp_places_list[count]; + memmove (&gomp_places_list[count], + &gomp_places_list[count + 1], + (gomp_places_list_len - count - 1) * sizeof (void *)); + --gomp_places_list_len; + gomp_places_list[gomp_places_list_len] = p; + } + else if (len == 1) + ++gomp_places_list_len; + else + { + for (count = 0; count < len - 1; count++) + if (!gomp_affinity_copy_place + (gomp_places_list[gomp_places_list_len + count + 1], + gomp_places_list[gomp_places_list_len + count], + stride)) + goto invalid; + gomp_places_list_len += len; + } + if (*env == '\0') + break; + env++; + } + while (1); + + if (gomp_places_list_len == 0) + { + gomp_error ("All places have been removed"); + goto invalid; + } + if (!gomp_affinity_finalize_place_list (false)) + goto invalid; + return true; + + invalid: + free (gomp_places_list); + gomp_places_list = NULL; + gomp_places_list_len = 0; + gomp_error ("Invalid value for environment variable %s", name); + return false; +} + /* Parse the OMP_STACKSIZE environment varible. Return true if one was present and it was successfully parsed. */ @@ -480,84 +913,89 @@ parse_wait_policy (void) static bool parse_affinity (void) { - char *env, *end; + char *env, *end, *start; + int pass; unsigned long cpu_beg, cpu_end, cpu_stride; - unsigned short *cpus = NULL; - size_t allocated = 0, used = 0, needed; + size_t count = 0, needed; env = getenv ("GOMP_CPU_AFFINITY"); if (env == NULL) return false; - do + start = env; + for (pass = 0; pass < 2; pass++) { - while (*env == ' ' || *env == '\t') - env++; - - cpu_beg = strtoul (env, &end, 0); - cpu_end = cpu_beg; - cpu_stride = 1; - if (env == end || cpu_beg >= 65536) - goto invalid; - - env = end; - if (*env == '-') + env = start; + if (pass == 1) { - cpu_end = strtoul (++env, &end, 0); - if (env == end || cpu_end >= 65536 || cpu_end < cpu_beg) + gomp_places_list_len = 0; + gomp_places_list = gomp_affinity_alloc (count, true); + if (gomp_places_list == NULL) + return false; + } + do + { + while (isspace ((unsigned char) *env)) + ++env; + + errno = 0; + cpu_beg = strtoul (env, &end, 0); + if (errno || cpu_beg >= 65536) goto invalid; + cpu_end = cpu_beg; + cpu_stride = 1; env = end; - if (*env == ':') + if (*env == '-') { - cpu_stride = strtoul (++env, &end, 0); - if (env == end || cpu_stride == 0 || cpu_stride >= 65536) + errno = 0; + cpu_end = strtoul (++env, &end, 0); + if (errno || cpu_end >= 65536 || cpu_end < cpu_beg) goto invalid; env = end; - } - } + if (*env == ':') + { + errno = 0; + cpu_stride = strtoul (++env, &end, 0); + if (errno || cpu_stride == 0 || cpu_stride >= 65536) + goto invalid; - needed = (cpu_end - cpu_beg) / cpu_stride + 1; - if (used + needed >= allocated) - { - unsigned short *new_cpus; + env = end; + } + } - if (allocated < 64) - allocated = 64; - if (allocated > needed) - allocated <<= 1; + needed = (cpu_end - cpu_beg) / cpu_stride + 1; + if (pass == 0) + count += needed; else - allocated += 2 * needed; - new_cpus = realloc (cpus, allocated * sizeof (unsigned short)); - if (new_cpus == NULL) { - free (cpus); - gomp_error ("not enough memory to store GOMP_CPU_AFFINITY list"); - return false; + while (needed--) + { + void *p = gomp_places_list[gomp_places_list_len]; + gomp_affinity_init_place (p); + if (gomp_affinity_add_cpus (p, cpu_beg, 1, 0, true)) + ++gomp_places_list_len; + cpu_beg += cpu_stride; + } } - cpus = new_cpus; - } + while (isspace ((unsigned char) *env)) + ++env; - while (needed--) - { - cpus[used++] = cpu_beg; - cpu_beg += cpu_stride; + if (*env == ',') + env++; + else if (*env == '\0') + break; } - - while (*env == ' ' || *env == '\t') - env++; - - if (*env == ',') - env++; - else if (*env == '\0') - break; + while (1); } - while (1); - gomp_cpu_affinity = cpus; - gomp_cpu_affinity_len = used; + if (gomp_places_list_len == 0) + { + free (gomp_places_list); + gomp_places_list = NULL; + } return true; invalid: @@ -565,12 +1003,160 @@ parse_affinity (void) return false; } + +static void +handle_omp_display_env (unsigned long stacksize, int wait_policy) +{ + const char *env; + bool display = false; + bool verbose = false; + int i; + + env = getenv ("OMP_DISPLAY_ENV"); + if (env == NULL) + return; + + while (isspace ((unsigned char) *env)) + ++env; + if (strncasecmp (env, "true", 4) == 0) + { + display = true; + env += 4; + } + else if (strncasecmp (env, "false", 5) == 0) + { + display = false; + env += 5; + } + else if (strncasecmp (env, "verbose", 7) == 0) + { + display = true; + verbose = true; + env += 7; + } + else + env = "X"; + while (isspace ((unsigned char) *env)) + ++env; + if (*env != '\0') + gomp_error ("Invalid value for environment variable OMP_DISPLAY_ENV"); + + if (!display) + return; + + fputs ("\nOPENMP DISPLAY ENVIRONMENT BEGIN\n", stderr); + + fputs (" _OPENMP = '201307'\n", stderr); + fprintf (stderr, " OMP_DYNAMIC = '%s'\n", + gomp_global_icv.dyn_var ? "TRUE" : "FALSE"); + fprintf (stderr, " OMP_NESTED = '%s'\n", + gomp_global_icv.nest_var ? "TRUE" : "FALSE"); + + fprintf (stderr, " OMP_NUM_THREADS = '%lu", gomp_global_icv.nthreads_var); + for (i = 1; i < gomp_nthreads_var_list_len; i++) + fprintf (stderr, ",%lu", gomp_nthreads_var_list[i]); + fputs ("'\n", stderr); + + fprintf (stderr, " OMP_SCHEDULE = '"); + switch (gomp_global_icv.run_sched_var) + { + case GFS_RUNTIME: + fputs ("RUNTIME", stderr); + break; + case GFS_STATIC: + fputs ("STATIC", stderr); + break; + case GFS_DYNAMIC: + fputs ("DYNAMIC", stderr); + break; + case GFS_GUIDED: + fputs ("GUIDED", stderr); + break; + case GFS_AUTO: + fputs ("AUTO", stderr); + break; + } + fputs ("'\n", stderr); + + fputs (" OMP_PROC_BIND = '", stderr); + switch (gomp_global_icv.bind_var) + { + case omp_proc_bind_false: + fputs ("FALSE", stderr); + break; + case omp_proc_bind_true: + fputs ("TRUE", stderr); + break; + case omp_proc_bind_master: + fputs ("MASTER", stderr); + break; + case omp_proc_bind_close: + fputs ("CLOSE", stderr); + break; + case omp_proc_bind_spread: + fputs ("SPREAD", stderr); + break; + } + for (i = 1; i < gomp_bind_var_list_len; i++) + switch (gomp_bind_var_list[i]) + { + case omp_proc_bind_master: + fputs (",MASTER", stderr); + break; + case omp_proc_bind_close: + fputs (",CLOSE", stderr); + break; + case omp_proc_bind_spread: + fputs (",SPREAD", stderr); + break; + } + fputs ("'\n", stderr); + fputs (" OMP_PLACES = '", stderr); + for (i = 0; i < gomp_places_list_len; i++) + { + fputs ("{", stderr); + gomp_affinity_print_place (gomp_places_list[i]); + fputs (i + 1 == gomp_places_list_len ? "}" : "},", stderr); + } + fputs ("'\n", stderr); + + fprintf (stderr, " OMP_STACKSIZE = '%lu'\n", stacksize); + + /* GOMP's default value is actually neither active nor passive. */ + fprintf (stderr, " OMP_WAIT_POLICY = '%s'\n", + wait_policy > 0 ? "ACTIVE" : "PASSIVE"); + fprintf (stderr, " OMP_THREAD_LIMIT = '%u'\n", + gomp_global_icv.thread_limit_var); + fprintf (stderr, " OMP_MAX_ACTIVE_LEVELS = '%lu'\n", + gomp_max_active_levels_var); + + fprintf (stderr, " OMP_CANCELLATION = '%s'\n", + gomp_cancel_var ? "TRUE" : "FALSE"); + fprintf (stderr, " OMP_DEFAULT_DEVICE = '%d'\n", + gomp_global_icv.default_device_var); + + if (verbose) + { + fputs (" GOMP_CPU_AFFINITY = ''\n", stderr); + fprintf (stderr, " GOMP_STACKSIZE = '%lu'\n", stacksize); +#ifdef HAVE_INTTYPES_H + fprintf (stderr, " GOMP_SPINCOUNT = '%"PRIu64"'\n", + (uint64_t) gomp_spin_count_var); +#else + fprintf (stderr, " GOMP_SPINCOUNT = '%lu'\n", + (unsigned long) gomp_spin_count_var); +#endif + } + + fputs ("OPENMP DISPLAY ENVIRONMENT END\n", stderr); +} + + static void __attribute__((constructor)) initialize_env (void) { - unsigned long stacksize; + unsigned long thread_limit_var, stacksize; int wait_policy; - bool bind_var = false; /* Do a compile time check that mkomp_h.pl did good job. */ omp_check_defines (); @@ -578,14 +1164,17 @@ initialize_env (void) parse_schedule (); parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var); parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); - parse_boolean ("OMP_PROC_BIND", &bind_var); + parse_boolean ("OMP_CANCELLATION", &gomp_cancel_var); + parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, true); - parse_unsigned_long ("OMP_THREAD_LIMIT", &gomp_thread_limit_var, false); - if (gomp_thread_limit_var != ULONG_MAX) - gomp_remaining_threads_count = gomp_thread_limit_var - 1; + if (parse_unsigned_long ("OMP_THREAD_LIMIT", &thread_limit_var, false)) + { + gomp_global_icv.thread_limit_var + = thread_limit_var > INT_MAX ? UINT_MAX : thread_limit_var; + } #ifndef HAVE_SYNC_BUILTINS - gomp_mutex_init (&gomp_remaining_threads_lock); + gomp_mutex_init (&gomp_managed_threads_lock); #endif gomp_init_num_threads (); gomp_available_cpus = gomp_global_icv.nthreads_var; @@ -594,7 +1183,14 @@ initialize_env (void) &gomp_nthreads_var_list, &gomp_nthreads_var_list_len)) gomp_global_icv.nthreads_var = gomp_available_cpus; - if (parse_affinity () || bind_var) + if (!parse_bind_var ("OMP_PROC_BIND", + &gomp_global_icv.bind_var, + &gomp_bind_var_list, + &gomp_bind_var_list_len)) + gomp_global_icv.bind_var = omp_proc_bind_false; + if (parse_places_var ("OMP_PLACES") + || parse_affinity () + || gomp_global_icv.bind_var) gomp_init_affinity (); wait_policy = parse_wait_policy (); if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var)) @@ -645,6 +1241,8 @@ initialize_env (void) if (err != 0) gomp_error ("Stack size change failed: %s", strerror (err)); } + + handle_omp_display_env (stacksize, wait_policy); } @@ -728,7 +1326,8 @@ omp_get_max_threads (void) int omp_get_thread_limit (void) { - return gomp_thread_limit_var > INT_MAX ? INT_MAX : gomp_thread_limit_var; + struct gomp_task_icv *icv = gomp_icv (false); + return icv->thread_limit_var > INT_MAX ? INT_MAX : icv->thread_limit_var; } void @@ -744,6 +1343,60 @@ omp_get_max_active_levels (void) return gomp_max_active_levels_var; } +int +omp_get_cancellation (void) +{ + return gomp_cancel_var; +} + +omp_proc_bind_t +omp_get_proc_bind (void) +{ + struct gomp_task_icv *icv = gomp_icv (false); + return icv->bind_var; +} + +void +omp_set_default_device (int device_num) +{ + struct gomp_task_icv *icv = gomp_icv (true); + icv->default_device_var = device_num >= 0 ? device_num : 0; +} + +int +omp_get_default_device (void) +{ + struct gomp_task_icv *icv = gomp_icv (false); + return icv->default_device_var; +} + +int +omp_get_num_devices (void) +{ + return gomp_get_num_devices (); +} + +int +omp_get_num_teams (void) +{ + /* Hardcoded to 1 on host, MIC, HSAIL? Maybe variable on PTX. */ + return 1; +} + +int +omp_get_team_num (void) +{ + /* Hardcoded to 0 on host, MIC, HSAIL? Maybe variable on PTX. */ + return 0; +} + +int +omp_is_initial_device (void) +{ + /* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX. */ + return 1; +} + ialias (omp_set_dynamic) ialias (omp_set_nested) ialias (omp_set_num_threads) @@ -755,3 +1408,11 @@ ialias (omp_get_max_threads) ialias (omp_get_thread_limit) ialias (omp_set_max_active_levels) ialias (omp_get_max_active_levels) +ialias (omp_get_cancellation) +ialias (omp_get_proc_bind) +ialias (omp_set_default_device) +ialias (omp_get_default_device) +ialias (omp_get_num_devices) +ialias (omp_get_num_teams) +ialias (omp_get_team_num) +ialias (omp_is_initial_device) diff --git a/libgomp/fortran.c b/libgomp/fortran.c index 3a4a42a..38b968a 100644 --- a/libgomp/fortran.c +++ b/libgomp/fortran.c @@ -31,11 +31,6 @@ #ifdef HAVE_ATTRIBUTE_ALIAS /* Use internal aliases if possible. */ -# define ULP STR1(__USER_LABEL_PREFIX__) -# define STR1(x) STR2(x) -# define STR2(x) #x -# define ialias_redirect(fn) \ - extern __typeof (fn) fn __asm__ (ULP "gomp_ialias_" #fn) attribute_hidden; # ifndef LIBGOMP_GNU_SYMBOL_VERSIONING ialias_redirect (omp_init_lock) ialias_redirect (omp_init_nest_lock) @@ -70,6 +65,14 @@ ialias_redirect (omp_get_ancestor_thread_num) ialias_redirect (omp_get_team_size) ialias_redirect (omp_get_active_level) ialias_redirect (omp_in_final) +ialias_redirect (omp_get_cancellation) +ialias_redirect (omp_get_proc_bind) +ialias_redirect (omp_set_default_device) +ialias_redirect (omp_get_default_device) +ialias_redirect (omp_get_num_devices) +ialias_redirect (omp_get_num_teams) +ialias_redirect (omp_get_team_num) +ialias_redirect (omp_is_initial_device) #endif #ifndef LIBGOMP_GNU_SYMBOL_VERSIONING @@ -435,3 +438,57 @@ omp_in_final_ (void) { return omp_in_final (); } + +int32_t +omp_get_cancellation_ (void) +{ + return omp_get_cancellation (); +} + +int32_t +omp_get_proc_bind_ (void) +{ + return omp_get_proc_bind (); +} + +void +omp_set_default_device_ (const int32_t *device_num) +{ + return omp_set_default_device (*device_num); +} + +void +omp_set_default_device_8_ (const int64_t *device_num) +{ + return omp_set_default_device (TO_INT (*device_num)); +} + +int32_t +omp_get_default_device_ (void) +{ + return omp_get_default_device (); +} + +int32_t +omp_get_num_devices_ (void) +{ + return omp_get_num_devices (); +} + +int32_t +omp_get_num_teams_ (void) +{ + return omp_get_num_teams (); +} + +int32_t +omp_get_team_num_ (void) +{ + return omp_get_team_num (); +} + +int32_t +omp_is_initial_device_ (void) +{ + return omp_is_initial_device (); +} diff --git a/libgomp/hashtab.h b/libgomp/hashtab.h new file mode 100644 index 0000000..7f1dad6 --- /dev/null +++ b/libgomp/hashtab.h @@ -0,0 +1,443 @@ +/* An expandable hash tables datatype. + Copyright (C) 1999-2013 + Free Software Foundation, Inc. + Contributed by Vladimir Makarov <vmakarov@cygnus.com>. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* The hash table code copied from include/hashtab.[hc] and adjusted, + so that the hash table entries are in the flexible array at the end + of the control structure, no callbacks are used and the elements in the + table are of the hash_entry_type type. + Before including this file, define hash_entry_type type and + htab_alloc and htab_free functions. After including it, define + htab_hash and htab_eq inline functions. */ + +/* This package implements basic hash table functionality. It is possible + to search for an entry, create an entry and destroy an entry. + + Elements in the table are generic pointers. + + The size of the table is not fixed; if the occupancy of the table + grows too high the hash table will be expanded. + + The abstract data implementation is based on generalized Algorithm D + from Knuth's book "The art of computer programming". Hash table is + expanded by creation of new hash table and transferring elements from + the old table to the new table. */ + +/* The type for a hash code. */ +typedef unsigned int hashval_t; + +static inline hashval_t htab_hash (hash_entry_type); +static inline bool htab_eq (hash_entry_type, hash_entry_type); + +/* This macro defines reserved value for empty table entry. */ + +#define HTAB_EMPTY_ENTRY ((hash_entry_type) 0) + +/* This macro defines reserved value for table entry which contained + a deleted element. */ + +#define HTAB_DELETED_ENTRY ((hash_entry_type) 1) + +/* Hash tables are of the following type. The structure + (implementation) of this type is not needed for using the hash + tables. All work with hash table should be executed only through + functions mentioned below. The size of this structure is subject to + change. */ + +struct htab { + /* Current size (in entries) of the hash table. */ + size_t size; + + /* Current number of elements including also deleted elements. */ + size_t n_elements; + + /* Current number of deleted elements in the table. */ + size_t n_deleted; + + /* Current size (in entries) of the hash table, as an index into the + table of primes. */ + unsigned int size_prime_index; + + /* Table itself. */ + hash_entry_type entries[]; +}; + +typedef struct htab *htab_t; + +/* An enum saying whether we insert into the hash table or not. */ +enum insert_option {NO_INSERT, INSERT}; + +/* Table of primes and multiplicative inverses. + + Note that these are not minimally reduced inverses. Unlike when generating + code to divide by a constant, we want to be able to use the same algorithm + all the time. All of these inverses (are implied to) have bit 32 set. + + For the record, the function that computed the table is in + libiberty/hashtab.c. */ + +struct prime_ent +{ + hashval_t prime; + hashval_t inv; + hashval_t inv_m2; /* inverse of prime-2 */ + hashval_t shift; +}; + +static struct prime_ent const prime_tab[] = { + { 7, 0x24924925, 0x9999999b, 2 }, + { 13, 0x3b13b13c, 0x745d1747, 3 }, + { 31, 0x08421085, 0x1a7b9612, 4 }, + { 61, 0x0c9714fc, 0x15b1e5f8, 5 }, + { 127, 0x02040811, 0x0624dd30, 6 }, + { 251, 0x05197f7e, 0x073260a5, 7 }, + { 509, 0x01824366, 0x02864fc8, 8 }, + { 1021, 0x00c0906d, 0x014191f7, 9 }, + { 2039, 0x0121456f, 0x0161e69e, 10 }, + { 4093, 0x00300902, 0x00501908, 11 }, + { 8191, 0x00080041, 0x00180241, 12 }, + { 16381, 0x000c0091, 0x00140191, 13 }, + { 32749, 0x002605a5, 0x002a06e6, 14 }, + { 65521, 0x000f00e2, 0x00110122, 15 }, + { 131071, 0x00008001, 0x00018003, 16 }, + { 262139, 0x00014002, 0x0001c004, 17 }, + { 524287, 0x00002001, 0x00006001, 18 }, + { 1048573, 0x00003001, 0x00005001, 19 }, + { 2097143, 0x00004801, 0x00005801, 20 }, + { 4194301, 0x00000c01, 0x00001401, 21 }, + { 8388593, 0x00001e01, 0x00002201, 22 }, + { 16777213, 0x00000301, 0x00000501, 23 }, + { 33554393, 0x00001381, 0x00001481, 24 }, + { 67108859, 0x00000141, 0x000001c1, 25 }, + { 134217689, 0x000004e1, 0x00000521, 26 }, + { 268435399, 0x00000391, 0x000003b1, 27 }, + { 536870909, 0x00000019, 0x00000029, 28 }, + { 1073741789, 0x0000008d, 0x00000095, 29 }, + { 2147483647, 0x00000003, 0x00000007, 30 }, + /* Avoid "decimal constant so large it is unsigned" for 4294967291. */ + { 0xfffffffb, 0x00000006, 0x00000008, 31 } +}; + +/* The following function returns an index into the above table of the + nearest prime number which is greater than N, and near a power of two. */ + +static unsigned int +higher_prime_index (unsigned long n) +{ + unsigned int low = 0; + unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]); + + while (low != high) + { + unsigned int mid = low + (high - low) / 2; + if (n > prime_tab[mid].prime) + low = mid + 1; + else + high = mid; + } + + /* If we've run out of primes, abort. */ + if (n > prime_tab[low].prime) + abort (); + + return low; +} + +/* Return the current size of given hash table. */ + +static inline size_t +htab_size (htab_t htab) +{ + return htab->size; +} + +/* Return the current number of elements in given hash table. */ + +static inline size_t +htab_elements (htab_t htab) +{ + return htab->n_elements - htab->n_deleted; +} + +/* Return X % Y. */ + +static inline hashval_t +htab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift) +{ + /* The multiplicative inverses computed above are for 32-bit types, and + requires that we be able to compute a highpart multiply. */ + if (sizeof (hashval_t) * __CHAR_BIT__ <= 32) + { + hashval_t t1, t2, t3, t4, q, r; + + t1 = ((unsigned long long)x * inv) >> 32; + t2 = x - t1; + t3 = t2 >> 1; + t4 = t1 + t3; + q = t4 >> shift; + r = x - (q * y); + + return r; + } + + /* Otherwise just use the native division routines. */ + return x % y; +} + +/* Compute the primary hash for HASH given HTAB's current size. */ + +static inline hashval_t +htab_mod (hashval_t hash, htab_t htab) +{ + const struct prime_ent *p = &prime_tab[htab->size_prime_index]; + return htab_mod_1 (hash, p->prime, p->inv, p->shift); +} + +/* Compute the secondary hash for HASH given HTAB's current size. */ + +static inline hashval_t +htab_mod_m2 (hashval_t hash, htab_t htab) +{ + const struct prime_ent *p = &prime_tab[htab->size_prime_index]; + return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift); +} + +/* Create hash table of size SIZE. */ + +static htab_t +htab_create (size_t size) +{ + htab_t result; + unsigned int size_prime_index; + + size_prime_index = higher_prime_index (size); + size = prime_tab[size_prime_index].prime; + + result = (htab_t) htab_alloc (sizeof (struct htab) + + size * sizeof (hash_entry_type)); + result->size = size; + result->n_elements = 0; + result->n_deleted = 0; + result->size_prime_index = size_prime_index; + memset (result->entries, 0, size * sizeof (hash_entry_type)); + return result; +} + +/* Similar to htab_find_slot, but without several unwanted side effects: + - Does not call htab_eq when it finds an existing entry. + - Does not change the count of elements in the hash table. + This function also assumes there are no deleted entries in the table. + HASH is the hash value for the element to be inserted. */ + +static hash_entry_type * +find_empty_slot_for_expand (htab_t htab, hashval_t hash) +{ + hashval_t index = htab_mod (hash, htab); + size_t size = htab_size (htab); + hash_entry_type *slot = htab->entries + index; + hashval_t hash2; + + if (*slot == HTAB_EMPTY_ENTRY) + return slot; + else if (*slot == HTAB_DELETED_ENTRY) + abort (); + + hash2 = htab_mod_m2 (hash, htab); + for (;;) + { + index += hash2; + if (index >= size) + index -= size; + + slot = htab->entries + index; + if (*slot == HTAB_EMPTY_ENTRY) + return slot; + else if (*slot == HTAB_DELETED_ENTRY) + abort (); + } +} + +/* The following function changes size of memory allocated for the + entries and repeatedly inserts the table elements. The occupancy + of the table after the call will be about 50%. Naturally the hash + table must already exist. Remember also that the place of the + table entries is changed. */ + +static htab_t +htab_expand (htab_t htab) +{ + htab_t nhtab; + hash_entry_type *olimit; + hash_entry_type *p; + size_t osize, elts; + + osize = htab->size; + olimit = htab->entries + osize; + elts = htab_elements (htab); + + /* Resize only when table after removal of unused elements is either + too full or too empty. */ + if (elts * 2 > osize || (elts * 8 < osize && osize > 32)) + nhtab = htab_create (elts * 2); + else + nhtab = htab_create (osize - 1); + nhtab->n_elements = htab->n_elements - htab->n_deleted; + + p = htab->entries; + do + { + hash_entry_type x = *p; + + if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) + *find_empty_slot_for_expand (nhtab, htab_hash (x)) = x; + + p++; + } + while (p < olimit); + + htab_free (htab); + return nhtab; +} + +/* This function searches for a hash table entry equal to the given + element. It cannot be used to insert or delete an element. */ + +static hash_entry_type +htab_find (htab_t htab, const hash_entry_type element) +{ + hashval_t index, hash2, hash = htab_hash (element); + size_t size; + hash_entry_type entry; + + size = htab_size (htab); + index = htab_mod (hash, htab); + + entry = htab->entries[index]; + if (entry == HTAB_EMPTY_ENTRY + || (entry != HTAB_DELETED_ENTRY && htab_eq (entry, element))) + return entry; + + hash2 = htab_mod_m2 (hash, htab); + for (;;) + { + index += hash2; + if (index >= size) + index -= size; + + entry = htab->entries[index]; + if (entry == HTAB_EMPTY_ENTRY + || (entry != HTAB_DELETED_ENTRY && htab_eq (entry, element))) + return entry; + } +} + +/* This function searches for a hash table slot containing an entry + equal to the given element. To delete an entry, call this with + insert=NO_INSERT, then call htab_clear_slot on the slot returned + (possibly after doing some checks). To insert an entry, call this + with insert=INSERT, then write the value you want into the returned + slot. */ + +static hash_entry_type * +htab_find_slot (htab_t *htabp, const hash_entry_type element, + enum insert_option insert) +{ + hash_entry_type *first_deleted_slot; + hashval_t index, hash2, hash = htab_hash (element); + size_t size; + hash_entry_type entry; + htab_t htab = *htabp; + + size = htab_size (htab); + if (insert == INSERT && size * 3 <= htab->n_elements * 4) + { + htab = *htabp = htab_expand (htab); + size = htab_size (htab); + } + + index = htab_mod (hash, htab); + + first_deleted_slot = NULL; + + entry = htab->entries[index]; + if (entry == HTAB_EMPTY_ENTRY) + goto empty_entry; + else if (entry == HTAB_DELETED_ENTRY) + first_deleted_slot = &htab->entries[index]; + else if (htab_eq (entry, element)) + return &htab->entries[index]; + + hash2 = htab_mod_m2 (hash, htab); + for (;;) + { + index += hash2; + if (index >= size) + index -= size; + + entry = htab->entries[index]; + if (entry == HTAB_EMPTY_ENTRY) + goto empty_entry; + else if (entry == HTAB_DELETED_ENTRY) + { + if (!first_deleted_slot) + first_deleted_slot = &htab->entries[index]; + } + else if (htab_eq (entry, element)) + return &htab->entries[index]; + } + + empty_entry: + if (insert == NO_INSERT) + return NULL; + + if (first_deleted_slot) + { + htab->n_deleted--; + *first_deleted_slot = HTAB_EMPTY_ENTRY; + return first_deleted_slot; + } + + htab->n_elements++; + return &htab->entries[index]; +} + +/* This function clears a specified slot in a hash table. It is + useful when you've already done the lookup and don't want to do it + again. */ + +static inline void +htab_clear_slot (htab_t htab, hash_entry_type *slot) +{ + if (slot < htab->entries || slot >= htab->entries + htab_size (htab) + || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY) + abort (); + + *slot = HTAB_DELETED_ENTRY; + htab->n_deleted++; +} + +/* Returns a hash code for pointer P. Simplified version of evahash */ + +static inline hashval_t +hash_pointer (const void *p) +{ + uintptr_t v = (uintptr_t) p; + if (sizeof (v) > sizeof (hashval_t)) + v ^= v >> (sizeof (uintptr_t) / 2 * __CHAR_BIT__); + return v; +} diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 322a435..bdc0486 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -39,6 +39,7 @@ #include <pthread.h> #include <stdbool.h> +#include <stdlib.h> #ifdef HAVE_ATTRIBUTE_VISIBILITY # pragma GCC visibility push(hidden) @@ -201,6 +202,10 @@ struct gomp_team_state /* Active nesting level. Only active parallel regions are counted. */ unsigned active_level; + /* Place-partition-var, offset and length into gomp_places_list array. */ + unsigned place_partition_off; + unsigned place_partition_len; + #ifdef HAVE_SYNC_BUILTINS /* Number of single stmts encountered. */ unsigned long single_count; @@ -214,30 +219,40 @@ struct gomp_team_state unsigned long static_trip; }; -/* These are the OpenMP 3.0 Internal Control Variables described in +struct target_mem_desc; + +/* These are the OpenMP 4.0 Internal Control Variables described in section 2.3.1. Those described as having one copy per task are stored within the structure; those described as having one copy for the whole program are (naturally) global variables. */ - + struct gomp_task_icv { unsigned long nthreads_var; enum gomp_schedule_type run_sched_var; int run_sched_modifier; + int default_device_var; + unsigned int thread_limit_var; bool dyn_var; bool nest_var; + char bind_var; + /* Internal ICV. */ + struct target_mem_desc *target_data; }; extern struct gomp_task_icv gomp_global_icv; -extern unsigned long gomp_thread_limit_var; -extern unsigned long gomp_remaining_threads_count; #ifndef HAVE_SYNC_BUILTINS -extern gomp_mutex_t gomp_remaining_threads_lock; +extern gomp_mutex_t gomp_managed_threads_lock; #endif extern unsigned long gomp_max_active_levels_var; +extern bool gomp_cancel_var; extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; extern unsigned long gomp_available_cpus, gomp_managed_threads; extern unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len; +extern char *gomp_bind_var_list; +extern unsigned long gomp_bind_var_list_len; +extern void **gomp_places_list; +extern unsigned long gomp_places_list_len; enum gomp_task_kind { @@ -247,6 +262,27 @@ enum gomp_task_kind GOMP_TASK_TIED }; +struct gomp_task; +struct gomp_taskgroup; +struct htab; + +struct gomp_task_depend_entry +{ + void *addr; + struct gomp_task_depend_entry *next; + struct gomp_task_depend_entry *prev; + struct gomp_task *task; + bool is_in; + bool redundant; +}; + +struct gomp_dependers_vec +{ + size_t n_elem; + size_t allocated; + struct gomp_task *elem[]; +}; + /* This structure describes a "task" to be run by a thread. */ struct gomp_task @@ -257,6 +293,13 @@ struct gomp_task struct gomp_task *prev_child; struct gomp_task *next_queue; struct gomp_task *prev_queue; + struct gomp_task *next_taskgroup; + struct gomp_task *prev_taskgroup; + struct gomp_taskgroup *taskgroup; + struct gomp_dependers_vec *dependers; + struct htab *depend_hash; + size_t depend_count; + size_t num_dependees; struct gomp_task_icv icv; void (*fn) (void *); void *fn_data; @@ -264,7 +307,19 @@ struct gomp_task bool in_taskwait; bool in_tied_task; bool final_task; + bool copy_ctors_done; gomp_sem_t taskwait_sem; + struct gomp_task_depend_entry depend[]; +}; + +struct gomp_taskgroup +{ + struct gomp_taskgroup *prev; + struct gomp_task *children; + bool in_taskgroup_wait; + bool cancelled; + gomp_sem_t taskgroup_sem; + size_t num_children; }; /* This structure describes a "team" of threads. These are the threads @@ -293,6 +348,12 @@ struct gomp_team of the threads in the team. */ gomp_sem_t **ordered_release; + /* List of work shares on which gomp_fini_work_share hasn't been + called yet. If the team hasn't been cancelled, this should be + equal to each thr->ts.work_share, but otherwise it can be a possibly + long list of workshares. */ + struct gomp_work_share *work_shares_to_free; + /* List of gomp_work_share structs chained through next_free fields. This is populated and taken off only by the first thread in the team encountering a new work sharing construct, in a critical @@ -324,8 +385,20 @@ struct gomp_team gomp_mutex_t task_lock; struct gomp_task *task_queue; - int task_count; - int task_running_count; + /* Number of all GOMP_TASK_{WAITING,TIED} tasks in the team. */ + unsigned int task_count; + /* Number of GOMP_TASK_WAITING tasks currently waiting to be scheduled. */ + unsigned int task_queued_count; + /* Number of GOMP_TASK_{WAITING,TIED} tasks currently running + directly in gomp_barrier_handle_tasks; tasks spawned + from e.g. GOMP_taskwait or GOMP_taskgroup_end don't count, even when + that is called from a task run from gomp_barrier_handle_tasks. + task_running_count should be always <= team->nthreads, + and if current task isn't in_tied_task, then it will be + even < team->nthreads. */ + unsigned int task_running_count; + int work_share_cancelled; + int team_cancelled; /* This array contains structures for implicit tasks. */ struct gomp_task implicit_task[]; @@ -350,7 +423,11 @@ struct gomp_thread /* This semaphore is used for ordered loops. */ gomp_sem_t release; - /* user pthread thread pool */ + /* Place this thread is bound to plus one, or zero if not bound + to any place. */ + unsigned int place; + + /* User pthread thread pool */ struct gomp_thread_pool *thread_pool; }; @@ -363,11 +440,23 @@ struct gomp_thread_pool unsigned threads_size; unsigned threads_used; struct gomp_team *last_team; + /* Number of threads running in this contention group. */ + unsigned long threads_busy; /* This barrier holds and releases threads waiting in threads. */ gomp_barrier_t threads_dock; }; +enum gomp_cancel_kind +{ + GOMP_CANCEL_PARALLEL = 1, + GOMP_CANCEL_LOOP = 2, + GOMP_CANCEL_FOR = GOMP_CANCEL_LOOP, + GOMP_CANCEL_DO = GOMP_CANCEL_LOOP, + GOMP_CANCEL_SECTIONS = 4, + GOMP_CANCEL_TASKGROUP = 8 +}; + /* ... and here is that TLS data. */ #ifdef HAVE_TLS @@ -402,17 +491,22 @@ static inline struct gomp_task_icv *gomp_icv (bool write) /* The attributes to be used during thread creation. */ extern pthread_attr_t gomp_thread_attr; -/* Other variables. */ - -extern unsigned short *gomp_cpu_affinity; -extern size_t gomp_cpu_affinity_len; - /* Function prototypes. */ /* affinity.c */ extern void gomp_init_affinity (void); -extern void gomp_init_thread_affinity (pthread_attr_t *); +extern void gomp_init_thread_affinity (pthread_attr_t *, unsigned int); +extern void **gomp_affinity_alloc (unsigned long, bool); +extern void gomp_affinity_init_place (void *); +extern bool gomp_affinity_add_cpus (void *, unsigned long, unsigned long, + long, bool); +extern bool gomp_affinity_remove_cpu (void *, unsigned long); +extern bool gomp_affinity_copy_place (void *, void *, long); +extern bool gomp_affinity_same_place (void *, void *); +extern bool gomp_affinity_finalize_place_list (bool); +extern bool gomp_affinity_init_level (int, unsigned long, bool); +extern void gomp_affinity_print_place (void *); /* alloc.c */ @@ -486,6 +580,8 @@ extern void gomp_barrier_handle_tasks (gomp_barrier_state_t); static void inline gomp_finish_task (struct gomp_task *task) { + if (__builtin_expect (task->depend_hash != NULL, 0)) + free (task->depend_hash); gomp_sem_destroy (&task->taskwait_sem); } @@ -493,8 +589,13 @@ gomp_finish_task (struct gomp_task *task) extern struct gomp_team *gomp_new_team (unsigned); extern void gomp_team_start (void (*) (void *), void *, unsigned, - struct gomp_team *); + unsigned, struct gomp_team *); extern void gomp_team_end (void); +extern void gomp_free_thread (void *); + +/* target.c */ + +extern int gomp_get_num_devices (void); /* work.c */ @@ -502,6 +603,7 @@ extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned); extern void gomp_fini_work_share (struct gomp_work_share *); extern bool gomp_work_share_start (bool); extern void gomp_work_share_end (void); +extern bool gomp_work_share_end_cancel (void); extern void gomp_work_share_end_nowait (void); static inline void @@ -580,11 +682,19 @@ extern int gomp_test_nest_lock_25 (omp_nest_lock_25_t *) __GOMP_NOTHROW; #endif #ifdef HAVE_ATTRIBUTE_ALIAS +# define ialias_ulp ialias_str1(__USER_LABEL_PREFIX__) +# define ialias_str1(x) ialias_str2(x) +# define ialias_str2(x) #x # define ialias(fn) \ extern __typeof (fn) gomp_ialias_##fn \ __attribute__ ((alias (#fn))) attribute_hidden; +# define ialias_redirect(fn) \ + extern __typeof (fn) fn __asm__ (ialias_ulp "gomp_ialias_" #fn) attribute_hidden; +# define ialias_call(fn) gomp_ialias_ ## fn #else # define ialias(fn) +# define ialias_redirect(fn) +# define ialias_call(fn) fn #endif #endif /* LIBGOMP_H */ diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 7b051f9..b102fd8 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -113,6 +113,27 @@ OMP_3.1 { omp_in_final_; } OMP_3.0; +OMP_4.0 { + global: + omp_get_cancellation; + omp_get_cancellation_; + omp_get_proc_bind; + omp_get_proc_bind_; + omp_set_default_device; + omp_set_default_device_; + omp_set_default_device_8_; + omp_get_default_device; + omp_get_default_device_; + omp_get_num_devices; + omp_get_num_devices_; + omp_get_num_teams; + omp_get_num_teams_; + omp_get_team_num; + omp_get_team_num_; + omp_is_initial_device; + omp_is_initial_device_; +} OMP_3.1; + GOMP_1.0 { global: GOMP_atomic_end; @@ -184,3 +205,25 @@ GOMP_3.0 { global: GOMP_taskyield; } GOMP_2.0; + +GOMP_4.0 { + global: + GOMP_barrier_cancel; + GOMP_cancel; + GOMP_cancellation_point; + GOMP_loop_end_cancel; + GOMP_parallel_loop_dynamic; + GOMP_parallel_loop_guided; + GOMP_parallel_loop_runtime; + GOMP_parallel_loop_static; + GOMP_parallel_sections; + GOMP_parallel; + GOMP_sections_end_cancel; + GOMP_taskgroup_start; + GOMP_taskgroup_end; + GOMP_target; + GOMP_target_data; + GOMP_target_end_data; + GOMP_target_update; + GOMP_teams; +} GOMP_3.0; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 2985128..a6930cc 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -1083,12 +1083,9 @@ guaranteed not to change during the execution of the program. @node Environment Variables @chapter Environment Variables -The variables @env{OMP_DYNAMIC}, @env{OMP_MAX_ACTIVE_LEVELS}, -@env{OMP_NESTED}, @env{OMP_NUM_THREADS}, @env{OMP_SCHEDULE}, -@env{OMP_STACKSIZE},@env{OMP_THREAD_LIMIT} and @env{OMP_WAIT_POLICY} -are defined by section 4 of the OpenMP specifications in version 3.1, -while @env{GOMP_CPU_AFFINITY} and @env{GOMP_STACKSIZE} are GNU -extensions. +The environment variables which beginning with @env{OMP_} are defined by +section 4 of the OpenMP specification in version 4.0, while those +beginning with @env{GOMP_} are GNU extensions. @menu * OMP_DYNAMIC:: Dynamic adjustment of threads @@ -1099,9 +1096,11 @@ extensions. * OMP_SCHEDULE:: How threads are scheduled * OMP_THREAD_LIMIT:: Set the maximum number of threads * OMP_WAIT_POLICY:: How waiting threads are handled +* OMP_DISPLAY_ENV:: Show OpenMP version and environment variables * OMP_PROC_BIND:: Whether theads may be moved between CPUs * GOMP_CPU_AFFINITY:: Bind threads to specific CPUs * GOMP_STACKSIZE:: Set default thread stack size +* GOMP_SPINCOUNT:: Set the busy-wait spin count @end menu @@ -1119,7 +1118,7 @@ disabled by default. @ref{omp_set_dynamic} @item @emph{Reference}: -@uref{http://www.openmp.org/, OpenMP specifications v3.1}, section 4.3 +@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.3 @end table @@ -1137,7 +1136,7 @@ If undefined, the number of active levels is unlimited. @ref{omp_set_max_active_levels} @item @emph{Reference}: -@uref{http://www.openmp.org/, OpenMP specifications v3.1}, section 4.8 +@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.9 @end table @@ -1157,7 +1156,7 @@ regions are disabled by default. @ref{omp_set_nested} @item @emph{Reference}: -@uref{http://www.openmp.org/, OpenMP specifications v3.1}, section 4.5 +@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.6 @end table @@ -1177,7 +1176,7 @@ level. If undefined one thread per CPU is used. @ref{omp_set_num_threads} @item @emph{Reference}: -@uref{http://www.openmp.org/, OpenMP specifications v3.1}, section 4.2 +@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.2 @end table @@ -1198,7 +1197,7 @@ dynamic scheduling and a chunk size of 1 is used. @ref{omp_set_schedule} @item @emph{Reference}: -@uref{http://www.openmp.org/, OpenMP specifications v3.1}, sections 2.5.1 and 4.1 +@uref{http://www.openmp.org/, OpenMP specifications v4.0}, sections 2.7.1 and 4.1 @end table @@ -1218,7 +1217,7 @@ stack size is left unchanged. If undefined, the stack size is system dependent. @item @emph{Reference}: -@uref{http://www.openmp.org/, OpenMP specifications v3.1}, sections 4.6 +@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.7 @end table @@ -1237,7 +1236,7 @@ the number of threads is not limited. @ref{omp_get_thread_limit} @item @emph{Reference}: -@uref{http://www.openmp.org/, OpenMP specifications v3.1}, section 4.9 +@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.10 @end table @@ -1250,10 +1249,14 @@ the number of threads is not limited. Specifies whether waiting threads should be active or passive. If the value is @code{PASSIVE}, waiting threads should not consume CPU power while waiting; while the value is @code{ACTIVE} specifies that -they should. +they should. If undefined, threads wait actively for a short time +before waiting passively. + +@item @emph{See also}: +@ref{GOMP_SPINCOUNT} @item @emph{Reference}: -@uref{http://www.openmp.org/, OpenMP specifications v3.1}, sections 4.7 +@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.8 @end table @@ -1264,14 +1267,32 @@ they should. @table @asis @item @emph{Description}: Specifies whether threads may be moved between processors. If set to -@code{true}, OpenMP theads should not be moved, if set to @code{false} -they may be moved. +@code{TRUE}, OpenMP theads should not be moved, if set to @code{FALSE} +they may be moved. If undefined, threads may move between processors. @item @emph{See also}: @ref{GOMP_CPU_AFFINITY} @item @emph{Reference}: -@uref{http://www.openmp.org/, OpenMP specifications v3.1}, sections 4.4 +@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.4 +@end table + + + +@node OMP_DISPLAY_ENV +@section @env{OMP_DISPLAY_ENV} -- Show OpenMP version and environment variables +@cindex Environment Variable +@table @asis +@item @emph{Description}: +If set to @code{TRUE}, the OpenMP version number and the values +associated with the OpenMP environment variables are printed to @code{stderr}. +If set to @code{VERBOSE}, it additionally shows the value of the environment +variables which are GNU extensions. If undefined or set to @code{FALSE}, +this information will not be shown. + + +@item @emph{Reference}: +@uref{http://www.openmp.org/, OpenMP specifications v4.0}, section 4.12 @end table @@ -1298,7 +1319,7 @@ Fortran, may be used to query the setting of the @code{GOMP_CPU_AFFINITY} environment variable. A defined CPU affinity on startup cannot be changed or disabled during the runtime of the application. -If this environment variable is omitted, the host system will handle the +If this environment variable is omitted, the host system will handle the assignment of threads to CPUs. @item @emph{See also}: @@ -1331,6 +1352,33 @@ GCC Patches Mailinglist} +@node GOMP_SPINCOUNT +@section @env{GOMP_SPINCOUNT} -- Set the busy-wait spin count +@cindex Environment Variable +@cindex Implementation specific setting +@table @asis +@item @emph{Description}: +Determines how long a threads waits actively with consuming CPU power +before waiting passively without consuming CPU power. The value may be +either @code{INFINITE}, @code{INFINITY} to always wait actively or an +integer which gives the number of spins of the busy-wait loop. The +integer may optionally be followed by the following suffixes acting +as multiplication factors: @code{k} (kilo, thousand), @code{M} (mega, +million), @code{G} (giga, billion), or @code{T} (tera, trillion). +If undefined, 0 is used when @env{OMP_WAIT_POLICY} is @code{PASSIVE}, +300,000 is used when @env{OMP_WAIT_POLICY} is undefined and +30 billion is used when @env{OMP_WAIT_POLICY} is @code{ACTIVE}. +If there are more OpenMP threads than available CPUs, 1000 and 100 +spins are used for @env{OMP_WAIT_POLICY} being @code{ACTIVE} or +undefined, respectively; unless the @env{GOMP_SPINCOUNT} is lower +or @env{OMP_WAIT_POLICY} is @code{PASSIVE}. + +@item @emph{See also}: +@ref{OMP_WAIT_POLICY} +@end table + + + @c --------------------------------------------------------------------- @c The libgomp ABI @c --------------------------------------------------------------------- diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index a31e345..32c4cf6 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -33,6 +33,7 @@ /* barrier.c */ extern void GOMP_barrier (void); +extern bool GOMP_barrier_cancel (void); /* critical.c */ @@ -76,9 +77,22 @@ extern void GOMP_parallel_loop_guided_start (void (*)(void *), void *, unsigned, long, long, long, long); extern void GOMP_parallel_loop_runtime_start (void (*)(void *), void *, unsigned, long, long, long); +extern void GOMP_parallel_loop_static (void (*)(void *), void *, + unsigned, long, long, long, long, + unsigned); +extern void GOMP_parallel_loop_dynamic (void (*)(void *), void *, + unsigned, long, long, long, long, + unsigned); +extern void GOMP_parallel_loop_guided (void (*)(void *), void *, + unsigned, long, long, long, long, + unsigned); +extern void GOMP_parallel_loop_runtime (void (*)(void *), void *, + unsigned, long, long, long, + unsigned); extern void GOMP_loop_end (void); extern void GOMP_loop_end_nowait (void); +extern bool GOMP_loop_end_cancel (void); /* loop_ull.c */ @@ -157,13 +171,18 @@ extern void GOMP_ordered_end (void); extern void GOMP_parallel_start (void (*) (void *), void *, unsigned); extern void GOMP_parallel_end (void); +extern void GOMP_parallel (void (*) (void *), void *, unsigned, unsigned); +extern bool GOMP_cancel (int, bool); +extern bool GOMP_cancellation_point (int); /* task.c */ extern void GOMP_task (void (*) (void *), void *, void (*) (void *, void *), - long, long, bool, unsigned); + long, long, bool, unsigned, void **); extern void GOMP_taskwait (void); extern void GOMP_taskyield (void); +extern void GOMP_taskgroup_start (void); +extern void GOMP_taskgroup_end (void); /* sections.c */ @@ -171,8 +190,11 @@ extern unsigned GOMP_sections_start (unsigned); extern unsigned GOMP_sections_next (void); extern void GOMP_parallel_sections_start (void (*) (void *), void *, unsigned, unsigned); +extern void GOMP_parallel_sections (void (*) (void *), void *, + unsigned, unsigned, unsigned); extern void GOMP_sections_end (void); extern void GOMP_sections_end_nowait (void); +extern bool GOMP_sections_end_cancel (void); /* single.c */ @@ -180,4 +202,15 @@ extern bool GOMP_single_start (void); extern void *GOMP_single_copy_start (void); extern void GOMP_single_copy_end (void *); +/* target.c */ + +extern void GOMP_target (int, void (*) (void *), const void *, + size_t, void **, size_t *, unsigned char *); +extern void GOMP_target_data (int, const void *, + size_t, void **, size_t *, unsigned char *); +extern void GOMP_target_end_data (void); +extern void GOMP_target_update (int, const void *, + size_t, void **, size_t *, unsigned char *); +extern void GOMP_teams (unsigned int, unsigned int); + #endif /* LIBGOMP_G_H */ diff --git a/libgomp/loop.c b/libgomp/loop.c index 12c818b..f5b33a0 100644 --- a/libgomp/loop.c +++ b/libgomp/loop.c @@ -439,14 +439,14 @@ static void gomp_parallel_loop_start (void (*fn) (void *), void *data, unsigned num_threads, long start, long end, long incr, enum gomp_schedule_type sched, - long chunk_size) + long chunk_size, unsigned int flags) { struct gomp_team *team; num_threads = gomp_resolve_num_threads (num_threads, 0); team = gomp_new_team (num_threads); gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size); - gomp_team_start (fn, data, num_threads, team); + gomp_team_start (fn, data, num_threads, flags, team); } void @@ -455,7 +455,7 @@ GOMP_parallel_loop_static_start (void (*fn) (void *), void *data, long incr, long chunk_size) { gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, - GFS_STATIC, chunk_size); + GFS_STATIC, chunk_size, 0); } void @@ -464,7 +464,7 @@ GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data, long incr, long chunk_size) { gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, - GFS_DYNAMIC, chunk_size); + GFS_DYNAMIC, chunk_size, 0); } void @@ -473,7 +473,7 @@ GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data, long incr, long chunk_size) { gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, - GFS_GUIDED, chunk_size); + GFS_GUIDED, chunk_size, 0); } void @@ -483,11 +483,59 @@ GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data, { struct gomp_task_icv *icv = gomp_icv (false); gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, - icv->run_sched_var, icv->run_sched_modifier); + icv->run_sched_var, icv->run_sched_modifier, 0); +} + +ialias_redirect (GOMP_parallel_end) + +void +GOMP_parallel_loop_static (void (*fn) (void *), void *data, + unsigned num_threads, long start, long end, + long incr, long chunk_size, unsigned flags) +{ + gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, + GFS_STATIC, chunk_size, flags); + fn (data); + GOMP_parallel_end (); +} + +void +GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data, + unsigned num_threads, long start, long end, + long incr, long chunk_size, unsigned flags) +{ + gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, + GFS_DYNAMIC, chunk_size, flags); + fn (data); + GOMP_parallel_end (); +} + +void +GOMP_parallel_loop_guided (void (*fn) (void *), void *data, + unsigned num_threads, long start, long end, + long incr, long chunk_size, unsigned flags) +{ + gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, + GFS_GUIDED, chunk_size, flags); + fn (data); + GOMP_parallel_end (); +} + +void +GOMP_parallel_loop_runtime (void (*fn) (void *), void *data, + unsigned num_threads, long start, long end, + long incr, unsigned flags) +{ + struct gomp_task_icv *icv = gomp_icv (false); + gomp_parallel_loop_start (fn, data, num_threads, start, end, incr, + icv->run_sched_var, icv->run_sched_modifier, + flags); + fn (data); + GOMP_parallel_end (); } /* The GOMP_loop_end* routines are called after the thread is told that - all loop iterations are complete. This first version synchronizes + all loop iterations are complete. The first two versions synchronize all threads; the nowait version does not. */ void @@ -496,6 +544,12 @@ GOMP_loop_end (void) gomp_work_share_end (); } +bool +GOMP_loop_end_cancel (void) +{ + return gomp_work_share_end_cancel (); +} + void GOMP_loop_end_nowait (void) { diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index 5db4407..4fc1236 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -52,6 +52,15 @@ typedef enum omp_sched_t omp_sched_auto = 4 } omp_sched_t; +typedef enum omp_proc_bind_t +{ + omp_proc_bind_false = 0, + omp_proc_bind_true = 1, + omp_proc_bind_master = 2, + omp_proc_bind_close = 3, + omp_proc_bind_spread = 4 +} omp_proc_bind_t; + #ifdef __cplusplus extern "C" { # define __GOMP_NOTHROW throw () @@ -88,17 +97,28 @@ extern int omp_test_nest_lock (omp_nest_lock_t *) __GOMP_NOTHROW; extern double omp_get_wtime (void) __GOMP_NOTHROW; extern double omp_get_wtick (void) __GOMP_NOTHROW; -void omp_set_schedule (omp_sched_t, int) __GOMP_NOTHROW; -void omp_get_schedule (omp_sched_t *, int *) __GOMP_NOTHROW; -int omp_get_thread_limit (void) __GOMP_NOTHROW; -void omp_set_max_active_levels (int) __GOMP_NOTHROW; -int omp_get_max_active_levels (void) __GOMP_NOTHROW; -int omp_get_level (void) __GOMP_NOTHROW; -int omp_get_ancestor_thread_num (int) __GOMP_NOTHROW; -int omp_get_team_size (int) __GOMP_NOTHROW; -int omp_get_active_level (void) __GOMP_NOTHROW; - -int omp_in_final (void) __GOMP_NOTHROW; +extern void omp_set_schedule (omp_sched_t, int) __GOMP_NOTHROW; +extern void omp_get_schedule (omp_sched_t *, int *) __GOMP_NOTHROW; +extern int omp_get_thread_limit (void) __GOMP_NOTHROW; +extern void omp_set_max_active_levels (int) __GOMP_NOTHROW; +extern int omp_get_max_active_levels (void) __GOMP_NOTHROW; +extern int omp_get_level (void) __GOMP_NOTHROW; +extern int omp_get_ancestor_thread_num (int) __GOMP_NOTHROW; +extern int omp_get_team_size (int) __GOMP_NOTHROW; +extern int omp_get_active_level (void) __GOMP_NOTHROW; + +extern int omp_in_final (void) __GOMP_NOTHROW; + +extern int omp_get_cancellation (void) __GOMP_NOTHROW; +extern omp_proc_bind_t omp_get_proc_bind (void) __GOMP_NOTHROW; + +extern void omp_set_default_device (int) __GOMP_NOTHROW; +extern int omp_get_default_device (void) __GOMP_NOTHROW; +extern int omp_get_num_devices (void) __GOMP_NOTHROW; +extern int omp_get_num_teams (void) __GOMP_NOTHROW; +extern int omp_get_team_num (void) __GOMP_NOTHROW; + +extern int omp_is_initial_device (void) __GOMP_NOTHROW; #ifdef __cplusplus } diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in index c9bc5fd..3c6deb6 100644 --- a/libgomp/omp_lib.f90.in +++ b/libgomp/omp_lib.f90.in @@ -27,16 +27,22 @@ integer, parameter :: omp_lock_kind = @OMP_LOCK_KIND@ integer, parameter :: omp_nest_lock_kind = @OMP_NEST_LOCK_KIND@ integer, parameter :: omp_sched_kind = 4 + integer, parameter :: omp_proc_bind_kind = 4 + integer (omp_sched_kind), parameter :: omp_sched_static = 1 + integer (omp_sched_kind), parameter :: omp_sched_dynamic = 2 + integer (omp_sched_kind), parameter :: omp_sched_guided = 3 + integer (omp_sched_kind), parameter :: omp_sched_auto = 4 + integer (omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0 + integer (omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1 + integer (omp_proc_bind_kind), parameter :: omp_proc_bind_master = 2 + integer (omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 + integer (omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 end module module omp_lib use omp_lib_kinds implicit none integer, parameter :: openmp_version = 201107 - integer (omp_sched_kind), parameter :: omp_sched_static = 1 - integer (omp_sched_kind), parameter :: omp_sched_dynamic = 2 - integer (omp_sched_kind), parameter :: omp_sched_guided = 3 - integer (omp_sched_kind), parameter :: omp_sched_auto = 4 interface subroutine omp_init_lock (svar) @@ -123,21 +129,18 @@ interface function omp_get_dynamic () - use omp_lib_kinds logical (4) :: omp_get_dynamic end function omp_get_dynamic end interface interface function omp_get_nested () - use omp_lib_kinds logical (4) :: omp_get_nested end function omp_get_nested end interface interface function omp_in_parallel () - use omp_lib_kinds logical (4) :: omp_in_parallel end function omp_in_parallel end interface @@ -152,28 +155,24 @@ interface function omp_get_max_threads () - use omp_lib_kinds integer (4) :: omp_get_max_threads end function omp_get_max_threads end interface interface function omp_get_num_procs () - use omp_lib_kinds integer (4) :: omp_get_num_procs end function omp_get_num_procs end interface interface function omp_get_num_threads () - use omp_lib_kinds integer (4) :: omp_get_num_threads end function omp_get_num_threads end interface interface function omp_get_thread_num () - use omp_lib_kinds integer (4) :: omp_get_thread_num end function omp_get_thread_num end interface @@ -226,44 +225,37 @@ interface function omp_get_thread_limit () - use omp_lib_kinds integer (4) :: omp_get_thread_limit end function omp_get_thread_limit end interface interface omp_set_max_active_levels subroutine omp_set_max_active_levels (max_levels) - use omp_lib_kinds integer (4), intent (in) :: max_levels end subroutine omp_set_max_active_levels subroutine omp_set_max_active_levels_8 (max_levels) - use omp_lib_kinds integer (8), intent (in) :: max_levels end subroutine omp_set_max_active_levels_8 end interface interface function omp_get_max_active_levels () - use omp_lib_kinds integer (4) :: omp_get_max_active_levels end function omp_get_max_active_levels end interface interface function omp_get_level () - use omp_lib_kinds integer (4) :: omp_get_level end function omp_get_level end interface interface omp_get_ancestor_thread_num function omp_get_ancestor_thread_num (level) - use omp_lib_kinds integer (4), intent (in) :: level integer (4) :: omp_get_ancestor_thread_num end function omp_get_ancestor_thread_num function omp_get_ancestor_thread_num_8 (level) - use omp_lib_kinds integer (8), intent (in) :: level integer (4) :: omp_get_ancestor_thread_num_8 end function omp_get_ancestor_thread_num_8 @@ -271,12 +263,10 @@ interface omp_get_team_size function omp_get_team_size (level) - use omp_lib_kinds integer (4), intent (in) :: level integer (4) :: omp_get_team_size end function omp_get_team_size function omp_get_team_size_8 (level) - use omp_lib_kinds integer (8), intent (in) :: level integer (4) :: omp_get_team_size_8 end function omp_get_team_size_8 @@ -284,16 +274,66 @@ interface function omp_get_active_level () - use omp_lib_kinds integer (4) :: omp_get_active_level end function omp_get_active_level end interface interface function omp_in_final () - use omp_lib_kinds logical (4) :: omp_in_final end function omp_in_final end interface + interface + function omp_get_cancellation () + logical (4) :: omp_get_cancellation + end function omp_get_cancellation + end interface + + interface + function omp_get_proc_bind () + use omp_lib_kinds + integer (omp_proc_bind_kind) :: omp_get_proc_bind + end function omp_get_proc_bind + end interface + + interface omp_set_default_device + subroutine omp_set_default_device (device_num) + integer (4), intent (in) :: device_num + end subroutine omp_set_default_device + subroutine omp_set_default_device_8 (device_num) + integer (8), intent (in) :: device_num + end subroutine omp_set_default_device_8 + end interface + + interface + function omp_get_default_device () + integer (4) :: omp_get_default_device + end function omp_get_default_device + end interface + + interface + function omp_get_num_devices () + integer (4) :: omp_get_num_devices + end function omp_get_num_devices + end interface + + interface + function omp_get_num_teams () + integer (4) :: omp_get_num_teams + end function omp_get_num_teams + end interface + + interface + function omp_get_team_num () + integer (4) :: omp_get_team_num + end function omp_get_team_num + end interface + + interface + function omp_is_initial_device () + logical (4) :: omp_is_initial_device + end function omp_is_initial_device + end interface + end module omp_lib diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in index f188edc..804636d 100644 --- a/libgomp/omp_lib.h.in +++ b/libgomp/omp_lib.h.in @@ -33,6 +33,18 @@ parameter (omp_sched_dynamic = 2) parameter (omp_sched_guided = 3) parameter (omp_sched_auto = 4) + integer omp_proc_bind_kind + parameter (omp_proc_bind_kind = 4) + integer (omp_proc_bind_kind) omp_proc_bind_false + integer (omp_proc_bind_kind) omp_proc_bind_true + integer (omp_proc_bind_kind) omp_proc_bind_master + integer (omp_proc_bind_kind) omp_proc_bind_close + integer (omp_proc_bind_kind) omp_proc_bind_spread + parameter (omp_proc_bind_false = 0) + parameter (omp_proc_bind_true = 1) + parameter (omp_proc_bind_master = 2) + parameter (omp_proc_bind_close = 3) + parameter (omp_proc_bind_spread = 4) parameter (openmp_version = 201107) external omp_init_lock, omp_init_nest_lock @@ -68,3 +80,18 @@ external omp_in_final logical(4) omp_in_final + + external omp_get_cancelllation + logical(4) omp_get_cancelllation + + external omp_get_proc_bind + integer(omp_proc_bind_kind) omp_get_proc_bind + + external omp_set_default_device, omp_get_default_device + external omp_get_num_devices, omp_get_num_teams + external omp_get_team_num + integer(4) omp_get_default_device, omp_get_num_devices + integer(4) omp_get_num_teams, omp_get_team_num + + external omp_is_initial_device + logical(4) omp_is_initial_device diff --git a/libgomp/parallel.c b/libgomp/parallel.c index 4573511..ccc0b6a 100644 --- a/libgomp/parallel.c +++ b/libgomp/parallel.c @@ -37,18 +37,19 @@ unsigned gomp_resolve_num_threads (unsigned specified, unsigned count) { - struct gomp_thread *thread = gomp_thread(); + struct gomp_thread *thr = gomp_thread (); struct gomp_task_icv *icv; unsigned threads_requested, max_num_threads, num_threads; - unsigned long remaining; + unsigned long busy; + struct gomp_thread_pool *pool; icv = gomp_icv (false); if (specified == 1) return 1; - else if (thread->ts.active_level >= 1 && !icv->nest_var) + else if (thr->ts.active_level >= 1 && !icv->nest_var) return 1; - else if (thread->ts.active_level >= gomp_max_active_levels_var) + else if (thr->ts.active_level >= gomp_max_active_levels_var) return 1; /* If NUM_THREADS not specified, use nthreads_var. */ @@ -72,30 +73,46 @@ gomp_resolve_num_threads (unsigned specified, unsigned count) max_num_threads = count; } - /* ULONG_MAX stands for infinity. */ - if (__builtin_expect (gomp_thread_limit_var == ULONG_MAX, 1) + /* UINT_MAX stands for infinity. */ + if (__builtin_expect (icv->thread_limit_var == UINT_MAX, 1) || max_num_threads == 1) return max_num_threads; + /* The threads_busy counter lives in thread_pool, if there + isn't a thread_pool yet, there must be just one thread + in the contention group. If thr->team is NULL, this isn't + nested parallel, so there is just one thread in the + contention group as well, no need to handle it atomically. */ + pool = thr->thread_pool; + if (thr->ts.team == NULL) + { + num_threads = max_num_threads; + if (num_threads > icv->thread_limit_var) + num_threads = icv->thread_limit_var; + if (pool) + pool->threads_busy = num_threads; + return num_threads; + } + #ifdef HAVE_SYNC_BUILTINS do { - remaining = gomp_remaining_threads_count; + busy = pool->threads_busy; num_threads = max_num_threads; - if (num_threads > remaining) - num_threads = remaining + 1; + if (icv->thread_limit_var - busy + 1 < num_threads) + num_threads = icv->thread_limit_var - busy + 1; } - while (__sync_val_compare_and_swap (&gomp_remaining_threads_count, - remaining, remaining - num_threads + 1) - != remaining); + while (__sync_val_compare_and_swap (&pool->threads_busy, + busy, busy + num_threads - 1) + != busy); #else - gomp_mutex_lock (&gomp_remaining_threads_lock); + gomp_mutex_lock (&gomp_managed_threads_lock); num_threads = max_num_threads; - remaining = gomp_remaining_threads_count; - if (num_threads > remaining) - num_threads = remaining + 1; - gomp_remaining_threads_count -= num_threads - 1; - gomp_mutex_unlock (&gomp_remaining_threads_lock); + busy = pool->threads_busy; + if (icv->thread_limit_var - busy + 1 < num_threads) + num_threads = icv->thread_limit_var - busy + 1; + pool->threads_busy += num_threads - 1; + gomp_mutex_unlock (&gomp_managed_threads_lock); #endif return num_threads; @@ -105,31 +122,113 @@ void GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads) { num_threads = gomp_resolve_num_threads (num_threads, 0); - gomp_team_start (fn, data, num_threads, gomp_new_team (num_threads)); + gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads)); } void GOMP_parallel_end (void) { - if (__builtin_expect (gomp_thread_limit_var != ULONG_MAX, 0)) + struct gomp_task_icv *icv = gomp_icv (false); + if (__builtin_expect (icv->thread_limit_var != UINT_MAX, 0)) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; - if (team && team->nthreads > 1) + unsigned int nthreads = team ? team->nthreads : 1; + gomp_team_end (); + if (nthreads > 1) { + /* If not nested, there is just one thread in the + contention group left, no need for atomicity. */ + if (thr->ts.team == NULL) + thr->thread_pool->threads_busy = 1; + else + { #ifdef HAVE_SYNC_BUILTINS - __sync_fetch_and_add (&gomp_remaining_threads_count, - 1UL - team->nthreads); + __sync_fetch_and_add (&thr->thread_pool->threads_busy, + 1UL - nthreads); #else - gomp_mutex_lock (&gomp_remaining_threads_lock); - gomp_remaining_threads_count -= team->nthreads - 1; - gomp_mutex_unlock (&gomp_remaining_threads_lock); + gomp_mutex_lock (&gomp_managed_threads_lock); + thr->thread_pool->threads_busy -= nthreads - 1; + gomp_mutex_unlock (&gomp_managed_threads_lock); #endif + } } } - gomp_team_end (); + else + gomp_team_end (); +} +ialias (GOMP_parallel_end) + +void +GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads, unsigned int flags) +{ + num_threads = gomp_resolve_num_threads (num_threads, 0); + gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads)); + fn (data); + ialias_call (GOMP_parallel_end) (); +} + +bool +GOMP_cancellation_point (int which) +{ + if (!gomp_cancel_var) + return false; + + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS)) + { + if (team == NULL) + return false; + return team->work_share_cancelled != 0; + } + else if (which & GOMP_CANCEL_TASKGROUP) + { + if (thr->task->taskgroup && thr->task->taskgroup->cancelled) + return true; + /* FALLTHRU into the GOMP_CANCEL_PARALLEL case, + as #pragma omp cancel parallel also cancels all explicit + tasks. */ + } + if (team) + return gomp_team_barrier_cancelled (&team->barrier); + return false; } +ialias (GOMP_cancellation_point) + +bool +GOMP_cancel (int which, bool do_cancel) +{ + if (!gomp_cancel_var) + return false; + + if (!do_cancel) + return ialias_call (GOMP_cancellation_point) (which); + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS)) + { + /* In orphaned worksharing region, all we want to cancel + is current thread. */ + if (team != NULL) + team->work_share_cancelled = 1; + return true; + } + else if (which & GOMP_CANCEL_TASKGROUP) + { + if (thr->task->taskgroup && !thr->task->taskgroup->cancelled) + { + gomp_mutex_lock (&team->task_lock); + thr->task->taskgroup->cancelled = true; + gomp_mutex_unlock (&team->task_lock); + } + return true; + } + team->team_cancelled = 1; + gomp_team_barrier_cancel (team); + return true; +} /* The public OpenMP API for thread and team related inquiries. */ diff --git a/libgomp/sections.c b/libgomp/sections.c index 369f7a4..6fb117d 100644 --- a/libgomp/sections.c +++ b/libgomp/sections.c @@ -139,11 +139,27 @@ GOMP_parallel_sections_start (void (*fn) (void *), void *data, num_threads = gomp_resolve_num_threads (num_threads, count); team = gomp_new_team (num_threads); gomp_sections_init (&team->work_shares[0], count); - gomp_team_start (fn, data, num_threads, team); + gomp_team_start (fn, data, num_threads, 0, team); +} + +ialias_redirect (GOMP_parallel_end) + +void +GOMP_parallel_sections (void (*fn) (void *), void *data, + unsigned num_threads, unsigned count, unsigned flags) +{ + struct gomp_team *team; + + num_threads = gomp_resolve_num_threads (num_threads, count); + team = gomp_new_team (num_threads); + gomp_sections_init (&team->work_shares[0], count); + gomp_team_start (fn, data, num_threads, flags, team); + fn (data); + GOMP_parallel_end (); } /* The GOMP_section_end* routines are called after the thread is told - that all sections are complete. This first version synchronizes + that all sections are complete. The first two versions synchronize all threads; the nowait version does not. */ void @@ -152,6 +168,12 @@ GOMP_sections_end (void) gomp_work_share_end (); } +bool +GOMP_sections_end_cancel (void) +{ + return gomp_work_share_end_cancel (); +} + void GOMP_sections_end_nowait (void) { diff --git a/libgomp/target.c b/libgomp/target.c new file mode 100644 index 0000000..2e9865f --- /dev/null +++ b/libgomp/target.c @@ -0,0 +1,96 @@ +/* Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by Jakub Jelinek <jakub@redhat.com>. + + This file is part of the GNU OpenMP Library (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file handles the maintainence of threads in response to team + creation and termination. */ + +#include "libgomp.h" +#include <limits.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +attribute_hidden int +gomp_get_num_devices (void) +{ + return 0; +} + +/* Called when encountering a target directive. If DEVICE + is -1, it means use device-var ICV. If it is -2 (or any other value + larger than last available hw device, use host fallback. + FN is address of host code, OPENMP_TARGET contains value of the + __OPENMP_TARGET__ symbol in the shared library or binary that invokes + GOMP_target. HOSTADDRS, SIZES and KINDS are arrays + with MAPNUM entries, with addresses of the host objects, + sizes of the host objects (resp. for pointer kind pointer bias + and assumed sizeof (void *) size) and kinds. */ + +void +GOMP_target (int device, void (*fn) (void *), const void *openmp_target, + size_t mapnum, void **hostaddrs, size_t *sizes, + unsigned char *kinds) +{ + /* Host fallback. */ + struct gomp_thread old_thr, *thr = gomp_thread (); + old_thr = *thr; + memset (thr, '\0', sizeof (*thr)); + if (gomp_places_list) + { + thr->place = old_thr.place; + thr->ts.place_partition_len = gomp_places_list_len; + } + fn (hostaddrs); + gomp_free_thread (thr); + *thr = old_thr; +} + +void +GOMP_target_data (int device, const void *openmp_target, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned char *kinds) +{ +} + +void +GOMP_target_end_data (void) +{ +} + +void +GOMP_target_update (int device, const void *openmp_target, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned char *kinds) +{ +} + +void +GOMP_teams (unsigned int num_teams, unsigned int thread_limit) +{ + if (thread_limit) + { + struct gomp_task_icv *icv = gomp_icv (true); + icv->thread_limit_var + = thread_limit > INT_MAX ? UINT_MAX : thread_limit; + } + (void) num_teams; +} diff --git a/libgomp/task.c b/libgomp/task.c index 7de650a..a3d343f 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -29,6 +29,33 @@ #include <stdlib.h> #include <string.h> +typedef struct gomp_task_depend_entry *hash_entry_type; + +static inline void * +htab_alloc (size_t size) +{ + return gomp_malloc (size); +} + +static inline void +htab_free (void *ptr) +{ + free (ptr); +} + +#include "hashtab.h" + +static inline hashval_t +htab_hash (hash_entry_type element) +{ + return hash_pointer (element->addr); +} + +static inline bool +htab_eq (hash_entry_type x, hash_entry_type y) +{ + return x->addr == y->addr; +} /* Create a new task data structure. */ @@ -42,7 +69,12 @@ gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task, task->in_taskwait = false; task->in_tied_task = false; task->final_task = false; + task->copy_ctors_done = false; task->children = NULL; + task->taskgroup = NULL; + task->dependers = NULL; + task->depend_hash = NULL; + task->depend_count = 0; gomp_sem_init (&task->taskwait_sem, 0); } @@ -78,7 +110,8 @@ gomp_clear_parent (struct gomp_task *children) void GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), - long arg_size, long arg_align, bool if_clause, unsigned flags) + long arg_size, long arg_align, bool if_clause, unsigned flags, + void **depend) { struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; @@ -94,17 +127,58 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), flags &= ~1; #endif + /* If parallel or taskgroup has been cancelled, don't start new tasks. */ + if (team + && (gomp_team_barrier_cancelled (&team->barrier) + || (thr->task->taskgroup && thr->task->taskgroup->cancelled))) + return; + if (!if_clause || team == NULL || (thr->task && thr->task->final_task) || team->task_count > 64 * team->nthreads) { struct gomp_task task; + /* If there are depend clauses and earlier deferred sibling tasks + with depend clauses, check if there isn't a dependency. If there + is, fall through to the deferred task handling, as we can't + schedule such tasks right away. There is no need to handle + depend clauses for non-deferred tasks other than this, because + the parent task is suspended until the child task finishes and thus + it can't start further child tasks. */ + if ((flags & 8) && thr->task && thr->task->depend_hash) + { + struct gomp_task *parent = thr->task; + struct gomp_task_depend_entry elem, *ent = NULL; + size_t ndepend = (uintptr_t) depend[0]; + size_t nout = (uintptr_t) depend[1]; + size_t i; + gomp_mutex_lock (&team->task_lock); + for (i = 0; i < ndepend; i++) + { + elem.addr = depend[i + 2]; + ent = htab_find (parent->depend_hash, &elem); + for (; ent; ent = ent->next) + if (i >= nout && ent->is_in) + continue; + else + break; + if (ent) + break; + } + gomp_mutex_unlock (&team->task_lock); + if (ent) + goto defer; + } + gomp_init_task (&task, thr->task, gomp_icv (false)); task.kind = GOMP_TASK_IFFALSE; task.final_task = (thr->task && thr->task->final_task) || (flags & 2); if (thr->task) - task.in_tied_task = thr->task->in_tied_task; + { + task.in_tied_task = thr->task->in_tied_task; + task.taskgroup = thr->task->taskgroup; + } thr->task = &task; if (__builtin_expect (cpyfn != NULL, 0)) { @@ -135,29 +209,161 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), } else { + defer:; struct gomp_task *task; struct gomp_task *parent = thr->task; + struct gomp_taskgroup *taskgroup = parent->taskgroup; char *arg; bool do_wake; - - task = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); - arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) + size_t depend_size = 0; + + if (flags & 8) + depend_size = ((uintptr_t) depend[0] + * sizeof (struct gomp_task_depend_entry)); + task = gomp_malloc (sizeof (*task) + depend_size + + arg_size + arg_align - 1); + arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1) & ~(uintptr_t) (arg_align - 1)); gomp_init_task (task, parent, gomp_icv (false)); task->kind = GOMP_TASK_IFFALSE; task->in_tied_task = parent->in_tied_task; + task->taskgroup = taskgroup; thr->task = task; if (cpyfn) - cpyfn (arg, data); + { + cpyfn (arg, data); + task->copy_ctors_done = true; + } else memcpy (arg, data, arg_size); thr->task = parent; task->kind = GOMP_TASK_WAITING; task->fn = fn; task->fn_data = arg; - task->in_tied_task = true; task->final_task = (flags & 2) >> 1; gomp_mutex_lock (&team->task_lock); + /* If parallel or taskgroup has been cancelled, don't start new + tasks. */ + if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier) + || (taskgroup && taskgroup->cancelled)) + && !task->copy_ctors_done, 0)) + { + gomp_mutex_unlock (&team->task_lock); + gomp_finish_task (task); + free (task); + return; + } + if (taskgroup) + taskgroup->num_children++; + if (depend_size) + { + size_t ndepend = (uintptr_t) depend[0]; + size_t nout = (uintptr_t) depend[1]; + size_t i; + hash_entry_type ent; + + task->depend_count = ndepend; + task->num_dependees = 0; + if (parent->depend_hash == NULL) + parent->depend_hash + = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); + for (i = 0; i < ndepend; i++) + { + task->depend[i].addr = depend[2 + i]; + task->depend[i].next = NULL; + task->depend[i].prev = NULL; + task->depend[i].task = task; + task->depend[i].is_in = i >= nout; + task->depend[i].redundant = false; + + hash_entry_type *slot + = htab_find_slot (&parent->depend_hash, &task->depend[i], + INSERT); + hash_entry_type out = NULL; + if (*slot) + { + /* If multiple depends on the same task are the + same, all but the first one are redundant. + As inout/out come first, if any of them is + inout/out, it will win, which is the right + semantics. */ + if ((*slot)->task == task) + { + task->depend[i].redundant = true; + continue; + } + for (ent = *slot; ent; ent = ent->next) + { + /* depend(in:...) doesn't depend on earlier + depend(in:...). */ + if (i >= nout && ent->is_in) + continue; + + if (!ent->is_in) + out = ent; + + struct gomp_task *tsk = ent->task; + if (tsk->dependers == NULL) + { + tsk->dependers + = gomp_malloc (sizeof (struct gomp_dependers_vec) + + 6 * sizeof (struct gomp_task *)); + tsk->dependers->n_elem = 1; + tsk->dependers->allocated = 6; + tsk->dependers->elem[0] = task; + task->num_dependees++; + continue; + } + /* We already have some other dependency on tsk + from earlier depend clause. */ + else if (tsk->dependers->n_elem + && (tsk->dependers->elem[tsk->dependers->n_elem + - 1] + == task)) + continue; + else if (tsk->dependers->n_elem + == tsk->dependers->allocated) + { + tsk->dependers->allocated + = tsk->dependers->allocated * 2 + 2; + tsk->dependers + = gomp_realloc (tsk->dependers, + sizeof (struct gomp_dependers_vec) + + (tsk->dependers->allocated + * sizeof (struct gomp_task *))); + } + tsk->dependers->elem[tsk->dependers->n_elem++] = task; + task->num_dependees++; + } + task->depend[i].next = *slot; + (*slot)->prev = &task->depend[i]; + } + *slot = &task->depend[i]; + + /* There is no need to store more than one depend({,in}out:) + task per address in the hash table chain, because each out + depends on all earlier outs, thus it is enough to record + just the last depend({,in}out:). For depend(in:), we need + to keep all of the previous ones not terminated yet, because + a later depend({,in}out:) might need to depend on all of + them. So, if the new task's clause is depend({,in}out:), + we know there is at most one other depend({,in}out:) clause + in the list (out) and to maintain the invariant we now + need to remove it from the list. */ + if (!task->depend[i].is_in && out) + { + if (out->next) + out->next->prev = out->prev; + out->prev->next = out->next; + out->redundant = true; + } + } + if (task->num_dependees) + { + gomp_mutex_unlock (&team->task_lock); + return; + } + } if (parent->children) { task->next_child = parent->children; @@ -171,6 +377,22 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task->prev_child = task; } parent->children = task; + if (taskgroup) + { + if (taskgroup->children) + { + task->next_taskgroup = taskgroup->children; + task->prev_taskgroup = taskgroup->children->prev_taskgroup; + task->next_taskgroup->prev_taskgroup = task; + task->prev_taskgroup->next_taskgroup = task; + } + else + { + task->next_taskgroup = task; + task->prev_taskgroup = task; + } + taskgroup->children = task; + } if (team->task_queue) { task->next_queue = team->task_queue; @@ -185,6 +407,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), team->task_queue = task; } ++team->task_count; + ++team->task_queued_count; gomp_team_barrier_set_task_pending (&team->barrier); do_wake = team->task_running_count + !parent->in_tied_task < team->nthreads; @@ -194,6 +417,220 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), } } +static inline bool +gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, + struct gomp_taskgroup *taskgroup, struct gomp_team *team) +{ + if (parent && parent->children == child_task) + parent->children = child_task->next_child; + if (taskgroup && taskgroup->children == child_task) + taskgroup->children = child_task->next_taskgroup; + child_task->prev_queue->next_queue = child_task->next_queue; + child_task->next_queue->prev_queue = child_task->prev_queue; + if (team->task_queue == child_task) + { + if (child_task->next_queue != child_task) + team->task_queue = child_task->next_queue; + else + team->task_queue = NULL; + } + child_task->kind = GOMP_TASK_TIED; + if (--team->task_queued_count == 0) + gomp_team_barrier_clear_task_pending (&team->barrier); + if ((gomp_team_barrier_cancelled (&team->barrier) + || (taskgroup && taskgroup->cancelled)) + && !child_task->copy_ctors_done) + return true; + return false; +} + +static void +gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task) +{ + struct gomp_task *parent = child_task->parent; + size_t i; + + for (i = 0; i < child_task->depend_count; i++) + if (!child_task->depend[i].redundant) + { + if (child_task->depend[i].next) + child_task->depend[i].next->prev = child_task->depend[i].prev; + if (child_task->depend[i].prev) + child_task->depend[i].prev->next = child_task->depend[i].next; + else + { + hash_entry_type *slot + = htab_find_slot (&parent->depend_hash, &child_task->depend[i], + NO_INSERT); + if (*slot != &child_task->depend[i]) + abort (); + if (child_task->depend[i].next) + *slot = child_task->depend[i].next; + else + htab_clear_slot (parent->depend_hash, slot); + } + } +} + +static size_t +gomp_task_run_post_handle_dependers (struct gomp_task *child_task, + struct gomp_team *team) +{ + struct gomp_task *parent = child_task->parent; + size_t i, count = child_task->dependers->n_elem, ret = 0; + for (i = 0; i < count; i++) + { + struct gomp_task *task = child_task->dependers->elem[i]; + if (--task->num_dependees != 0) + continue; + + struct gomp_taskgroup *taskgroup = task->taskgroup; + if (parent) + { + if (parent->children) + { + task->next_child = parent->children; + task->prev_child = parent->children->prev_child; + task->next_child->prev_child = task; + task->prev_child->next_child = task; + } + else + { + task->next_child = task; + task->prev_child = task; + } + parent->children = task; + if (parent->in_taskwait) + { + parent->in_taskwait = false; + gomp_sem_post (&parent->taskwait_sem); + } + } + if (taskgroup) + { + if (taskgroup->children) + { + task->next_taskgroup = taskgroup->children; + task->prev_taskgroup = taskgroup->children->prev_taskgroup; + task->next_taskgroup->prev_taskgroup = task; + task->prev_taskgroup->next_taskgroup = task; + } + else + { + task->next_taskgroup = task; + task->prev_taskgroup = task; + } + taskgroup->children = task; + if (taskgroup->in_taskgroup_wait) + { + taskgroup->in_taskgroup_wait = false; + gomp_sem_post (&taskgroup->taskgroup_sem); + } + } + if (team->task_queue) + { + task->next_queue = team->task_queue; + task->prev_queue = team->task_queue->prev_queue; + task->next_queue->prev_queue = task; + task->prev_queue->next_queue = task; + } + else + { + task->next_queue = task; + task->prev_queue = task; + team->task_queue = task; + } + ++team->task_count; + ++team->task_queued_count; + ++ret; + } + free (child_task->dependers); + child_task->dependers = NULL; + if (ret > 1) + gomp_team_barrier_set_task_pending (&team->barrier); + return ret; +} + +static inline size_t +gomp_task_run_post_handle_depend (struct gomp_task *child_task, + struct gomp_team *team) +{ + if (child_task->depend_count == 0) + return 0; + + /* If parent is gone already, the hash table is freed and nothing + will use the hash table anymore, no need to remove anything from it. */ + if (child_task->parent != NULL) + gomp_task_run_post_handle_depend_hash (child_task); + + if (child_task->dependers == NULL) + return 0; + + return gomp_task_run_post_handle_dependers (child_task, team); +} + +static inline void +gomp_task_run_post_remove_parent (struct gomp_task *child_task) +{ + struct gomp_task *parent = child_task->parent; + if (parent == NULL) + return; + child_task->prev_child->next_child = child_task->next_child; + child_task->next_child->prev_child = child_task->prev_child; + if (parent->children != child_task) + return; + if (child_task->next_child != child_task) + parent->children = child_task->next_child; + else + { + /* We access task->children in GOMP_taskwait + outside of the task lock mutex region, so + need a release barrier here to ensure memory + written by child_task->fn above is flushed + before the NULL is written. */ + __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE); + if (parent->in_taskwait) + { + parent->in_taskwait = false; + gomp_sem_post (&parent->taskwait_sem); + } + } +} + +static inline void +gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task) +{ + struct gomp_taskgroup *taskgroup = child_task->taskgroup; + if (taskgroup == NULL) + return; + child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup; + child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup; + if (taskgroup->num_children > 1) + --taskgroup->num_children; + else + { + /* We access taskgroup->num_children in GOMP_taskgroup_end + outside of the task lock mutex region, so + need a release barrier here to ensure memory + written by child_task->fn above is flushed + before the NULL is written. */ + __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE); + } + if (taskgroup->children != child_task) + return; + if (child_task->next_taskgroup != child_task) + taskgroup->children = child_task->next_taskgroup; + else + { + taskgroup->children = NULL; + if (taskgroup->in_taskgroup_wait) + { + taskgroup->in_taskgroup_wait = false; + gomp_sem_post (&taskgroup->taskgroup_sem); + } + } +} + void gomp_barrier_handle_tasks (gomp_barrier_state_t state) { @@ -202,6 +639,7 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state) struct gomp_task *task = thr->task; struct gomp_task *child_task = NULL; struct gomp_task *to_free = NULL; + int do_wake = 0; gomp_mutex_lock (&team->task_lock); if (gomp_barrier_last_thread (state)) @@ -218,26 +656,31 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state) while (1) { + bool cancelled = false; if (team->task_queue != NULL) { - struct gomp_task *parent; - child_task = team->task_queue; - parent = child_task->parent; - if (parent && parent->children == child_task) - parent->children = child_task->next_child; - child_task->prev_queue->next_queue = child_task->next_queue; - child_task->next_queue->prev_queue = child_task->prev_queue; - if (child_task->next_queue != child_task) - team->task_queue = child_task->next_queue; - else - team->task_queue = NULL; - child_task->kind = GOMP_TASK_TIED; + cancelled = gomp_task_run_pre (child_task, child_task->parent, + child_task->taskgroup, team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) + { + gomp_finish_task (to_free); + free (to_free); + to_free = NULL; + } + goto finish_cancelled; + } team->task_running_count++; - if (team->task_count == team->task_running_count) - gomp_team_barrier_clear_task_pending (&team->barrier); + child_task->in_tied_task = true; } gomp_mutex_unlock (&team->task_lock); + if (do_wake) + { + gomp_team_barrier_wake (&team->barrier, do_wake); + do_wake = 0; + } if (to_free) { gomp_finish_task (to_free); @@ -255,33 +698,22 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state) gomp_mutex_lock (&team->task_lock); if (child_task) { - struct gomp_task *parent = child_task->parent; - if (parent) - { - child_task->prev_child->next_child = child_task->next_child; - child_task->next_child->prev_child = child_task->prev_child; - if (parent->children == child_task) - { - if (child_task->next_child != child_task) - parent->children = child_task->next_child; - else - { - /* We access task->children in GOMP_taskwait - outside of the task lock mutex region, so - need a release barrier here to ensure memory - written by child_task->fn above is flushed - before the NULL is written. */ - __atomic_store_n (&parent->children, NULL, - MEMMODEL_RELEASE); - if (parent->in_taskwait) - gomp_sem_post (&parent->taskwait_sem); - } - } - } + finish_cancelled:; + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); + gomp_task_run_post_remove_parent (child_task); gomp_clear_parent (child_task->children); + gomp_task_run_post_remove_taskgroup (child_task); to_free = child_task; child_task = NULL; - team->task_running_count--; + if (!cancelled) + team->task_running_count--; + if (new_tasks > 1) + { + do_wake = team->nthreads - team->task_running_count; + if (do_wake > new_tasks) + do_wake = new_tasks; + } if (--team->task_count == 0 && gomp_team_barrier_waiting_for_tasks (&team->barrier)) { @@ -304,9 +736,10 @@ GOMP_taskwait (void) struct gomp_task *task = thr->task; struct gomp_task *child_task = NULL; struct gomp_task *to_free = NULL; + int do_wake = 0; /* The acquire barrier on load of task->children here synchronizes - with the write of a NULL in gomp_barrier_handle_tasks. It is + with the write of a NULL in gomp_task_run_post_remove_parent. It is not necessary that we synchronize with other non-NULL writes at this point, but we must ensure that all writes to memory by a child thread task work function are seen before we exit from @@ -318,6 +751,7 @@ GOMP_taskwait (void) gomp_mutex_lock (&team->task_lock); while (1) { + bool cancelled = false; if (task->children == NULL) { gomp_mutex_unlock (&team->task_lock); @@ -331,26 +765,30 @@ GOMP_taskwait (void) if (task->children->kind == GOMP_TASK_WAITING) { child_task = task->children; - task->children = child_task->next_child; - child_task->prev_queue->next_queue = child_task->next_queue; - child_task->next_queue->prev_queue = child_task->prev_queue; - if (team->task_queue == child_task) + cancelled + = gomp_task_run_pre (child_task, task, child_task->taskgroup, + team); + if (__builtin_expect (cancelled, 0)) { - if (child_task->next_queue != child_task) - team->task_queue = child_task->next_queue; - else - team->task_queue = NULL; + if (to_free) + { + gomp_finish_task (to_free); + free (to_free); + to_free = NULL; + } + goto finish_cancelled; } - child_task->kind = GOMP_TASK_TIED; - team->task_running_count++; - if (team->task_count == team->task_running_count) - gomp_team_barrier_clear_task_pending (&team->barrier); } else /* All tasks we are waiting for are already running in other threads. Wait for them. */ task->in_taskwait = true; gomp_mutex_unlock (&team->task_lock); + if (do_wake) + { + gomp_team_barrier_wake (&team->barrier, do_wake); + do_wake = 0; + } if (to_free) { gomp_finish_task (to_free); @@ -364,14 +802,13 @@ GOMP_taskwait (void) thr->task = task; } else - { - gomp_sem_wait (&task->taskwait_sem); - task->in_taskwait = false; - return; - } + gomp_sem_wait (&task->taskwait_sem); gomp_mutex_lock (&team->task_lock); if (child_task) { + finish_cancelled:; + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); child_task->prev_child->next_child = child_task->next_child; child_task->next_child->prev_child = child_task->prev_child; if (task->children == child_task) @@ -382,10 +819,17 @@ GOMP_taskwait (void) task->children = NULL; } gomp_clear_parent (child_task->children); + gomp_task_run_post_remove_taskgroup (child_task); to_free = child_task; child_task = NULL; team->task_count--; - team->task_running_count--; + if (new_tasks > 1) + { + do_wake = team->nthreads - team->task_running_count + - !task->in_tied_task; + if (do_wake > new_tasks) + do_wake = new_tasks; + } } } } @@ -398,6 +842,153 @@ GOMP_taskyield (void) /* Nothing at the moment. */ } +void +GOMP_taskgroup_start (void) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + struct gomp_task *task = thr->task; + struct gomp_taskgroup *taskgroup; + + /* If team is NULL, all tasks are executed as + GOMP_TASK_IFFALSE tasks and thus all children tasks of + taskgroup and their descendant tasks will be finished + by the time GOMP_taskgroup_end is called. */ + if (team == NULL) + return; + taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup)); + taskgroup->prev = task->taskgroup; + taskgroup->children = NULL; + taskgroup->in_taskgroup_wait = false; + taskgroup->cancelled = false; + taskgroup->num_children = 0; + gomp_sem_init (&taskgroup->taskgroup_sem, 0); + task->taskgroup = taskgroup; +} + +void +GOMP_taskgroup_end (void) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + struct gomp_task *task = thr->task; + struct gomp_taskgroup *taskgroup; + struct gomp_task *child_task = NULL; + struct gomp_task *to_free = NULL; + int do_wake = 0; + + if (team == NULL) + return; + taskgroup = task->taskgroup; + + /* The acquire barrier on load of taskgroup->num_children here + synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup. + It is not necessary that we synchronize with other non-0 writes at + this point, but we must ensure that all writes to memory by a + child thread task work function are seen before we exit from + GOMP_taskgroup_end. */ + if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0) + goto finish; + + gomp_mutex_lock (&team->task_lock); + while (1) + { + bool cancelled = false; + if (taskgroup->children == NULL) + { + if (taskgroup->num_children) + goto do_wait; + gomp_mutex_unlock (&team->task_lock); + if (to_free) + { + gomp_finish_task (to_free); + free (to_free); + } + goto finish; + } + if (taskgroup->children->kind == GOMP_TASK_WAITING) + { + child_task = taskgroup->children; + cancelled + = gomp_task_run_pre (child_task, child_task->parent, taskgroup, + team); + if (__builtin_expect (cancelled, 0)) + { + if (to_free) + { + gomp_finish_task (to_free); + free (to_free); + to_free = NULL; + } + goto finish_cancelled; + } + } + else + { + do_wait: + /* All tasks we are waiting for are already running + in other threads. Wait for them. */ + taskgroup->in_taskgroup_wait = true; + } + gomp_mutex_unlock (&team->task_lock); + if (do_wake) + { + gomp_team_barrier_wake (&team->barrier, do_wake); + do_wake = 0; + } + if (to_free) + { + gomp_finish_task (to_free); + free (to_free); + to_free = NULL; + } + if (child_task) + { + thr->task = child_task; + child_task->fn (child_task->fn_data); + thr->task = task; + } + else + gomp_sem_wait (&taskgroup->taskgroup_sem); + gomp_mutex_lock (&team->task_lock); + if (child_task) + { + finish_cancelled:; + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); + child_task->prev_taskgroup->next_taskgroup + = child_task->next_taskgroup; + child_task->next_taskgroup->prev_taskgroup + = child_task->prev_taskgroup; + --taskgroup->num_children; + if (taskgroup->children == child_task) + { + if (child_task->next_taskgroup != child_task) + taskgroup->children = child_task->next_taskgroup; + else + taskgroup->children = NULL; + } + gomp_task_run_post_remove_parent (child_task); + gomp_clear_parent (child_task->children); + to_free = child_task; + child_task = NULL; + team->task_count--; + if (new_tasks > 1) + { + do_wake = team->nthreads - team->task_running_count + - !task->in_tied_task; + if (do_wake > new_tasks) + do_wake = new_tasks; + } + } + } + + finish: + task->taskgroup = taskgroup->prev; + gomp_sem_destroy (&taskgroup->taskgroup_sem); + free (taskgroup); +} + int omp_in_final (void) { diff --git a/libgomp/team.c b/libgomp/team.c index 243aa9a..f4c47f7 100644 --- a/libgomp/team.c +++ b/libgomp/team.c @@ -53,6 +53,7 @@ struct gomp_thread_start_data struct gomp_team_state ts; struct gomp_task *task; struct gomp_thread_pool *thread_pool; + unsigned int place; bool nested; }; @@ -84,6 +85,7 @@ gomp_thread_start (void *xdata) thr->thread_pool = data->thread_pool; thr->ts = data->ts; thr->task = data->task; + thr->place = data->place; thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; @@ -98,7 +100,7 @@ gomp_thread_start (void *xdata) gomp_barrier_wait (&team->barrier); local_fn (local_data); - gomp_team_barrier_wait (&team->barrier); + gomp_team_barrier_wait_final (&team->barrier); gomp_finish_task (task); gomp_barrier_wait_last (&team->barrier); } @@ -113,7 +115,7 @@ gomp_thread_start (void *xdata) struct gomp_task *task = thr->task; local_fn (local_data); - gomp_team_barrier_wait (&team->barrier); + gomp_team_barrier_wait_final (&team->barrier); gomp_finish_task (task); gomp_barrier_wait (&pool->threads_dock); @@ -126,6 +128,8 @@ gomp_thread_start (void *xdata) } gomp_sem_destroy (&thr->release); + thr->thread_pool = NULL; + thr->task = NULL; return NULL; } @@ -149,6 +153,7 @@ gomp_new_team (unsigned nthreads) #else gomp_mutex_init (&team->work_share_list_free_lock); #endif + team->work_shares_to_free = &team->work_shares[0]; gomp_init_work_share (&team->work_shares[0], false, nthreads); team->work_shares[0].next_alloc = NULL; team->work_share_list_free = NULL; @@ -167,7 +172,10 @@ gomp_new_team (unsigned nthreads) gomp_mutex_init (&team->task_lock); team->task_queue = NULL; team->task_count = 0; + team->task_queued_count = 0; team->task_running_count = 0; + team->work_share_cancelled = 0; + team->team_cancelled = 0; return team; } @@ -199,16 +207,19 @@ static struct gomp_thread_pool *gomp_new_thread_pool (void) static void gomp_free_pool_helper (void *thread_pool) { + struct gomp_thread *thr = gomp_thread (); struct gomp_thread_pool *pool = (struct gomp_thread_pool *) thread_pool; gomp_barrier_wait_last (&pool->threads_dock); - gomp_sem_destroy (&gomp_thread ()->release); + gomp_sem_destroy (&thr->release); + thr->thread_pool = NULL; + thr->task = NULL; pthread_exit (NULL); } /* Free a thread pool and release its threads. */ -static void +void gomp_free_thread (void *arg __attribute__((unused))) { struct gomp_thread *thr = gomp_thread (); @@ -236,9 +247,9 @@ gomp_free_thread (void *arg __attribute__((unused))) __sync_fetch_and_add (&gomp_managed_threads, 1L - pool->threads_used); #else - gomp_mutex_lock (&gomp_remaining_threads_lock); + gomp_mutex_lock (&gomp_managed_threads_lock); gomp_managed_threads -= pool->threads_used - 1L; - gomp_mutex_unlock (&gomp_remaining_threads_lock); + gomp_mutex_unlock (&gomp_managed_threads_lock); #endif } free (pool->threads); @@ -259,7 +270,7 @@ gomp_free_thread (void *arg __attribute__((unused))) void gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, - struct gomp_team *team) + unsigned flags, struct gomp_team *team) { struct gomp_thread_start_data *start_data; struct gomp_thread *thr, *nthr; @@ -270,17 +281,24 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, unsigned i, n, old_threads_used = 0; pthread_attr_t thread_attr, *attr; unsigned long nthreads_var; + char bind, bind_var; + unsigned int s = 0, rest = 0, p = 0, k = 0; + unsigned int affinity_count = 0; + struct gomp_thread **affinity_thr = NULL; thr = gomp_thread (); nested = thr->ts.team != NULL; if (__builtin_expect (thr->thread_pool == NULL, 0)) { thr->thread_pool = gomp_new_thread_pool (); + thr->thread_pool->threads_busy = nthreads; pthread_setspecific (gomp_thread_destructor, thr); } pool = thr->thread_pool; task = thr->task; icv = task ? &task->icv : &gomp_global_icv; + if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0) + gomp_init_affinity (); /* Always save the previous state, even if this isn't a nested team. In particular, we should save any work share state from an outer @@ -303,14 +321,95 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, if (__builtin_expect (gomp_nthreads_var_list != NULL, 0) && thr->ts.level < gomp_nthreads_var_list_len) nthreads_var = gomp_nthreads_var_list[thr->ts.level]; + bind_var = icv->bind_var; + if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false) + bind_var = flags & 7; + bind = bind_var; + if (__builtin_expect (gomp_bind_var_list != NULL, 0) + && thr->ts.level < gomp_bind_var_list_len) + bind_var = gomp_bind_var_list[thr->ts.level]; gomp_init_task (thr->task, task, icv); team->implicit_task[0].icv.nthreads_var = nthreads_var; + team->implicit_task[0].icv.bind_var = bind_var; if (nthreads == 1) return; i = 1; + if (__builtin_expect (gomp_places_list != NULL, 0)) + { + if (bind == omp_proc_bind_false) + bind = omp_proc_bind_true; + /* Depending on chosen proc_bind model, set subpartition + for the master thread and initialize helper variables + P and optionally S, K and/or REST used by later place + computation for each additional thread. */ + p = thr->place - 1; + switch (bind) + { + case omp_proc_bind_false: + bind = omp_proc_bind_true; + /* FALLTHRU */ + case omp_proc_bind_true: + case omp_proc_bind_close: + if (nthreads > thr->ts.place_partition_len) + { + /* T > P. S threads will be placed in each place, + and the final REM threads placed one by one + into the already occupied places. */ + s = nthreads / thr->ts.place_partition_len; + rest = nthreads % thr->ts.place_partition_len; + } + else + s = 1; + k = 1; + break; + case omp_proc_bind_master: + /* Each thread will be bound to master's place. */ + break; + case omp_proc_bind_spread: + if (nthreads <= thr->ts.place_partition_len) + { + /* T <= P. Each subpartition will have in between s + and s+1 places (subpartitions starting at or + after rest will have s places, earlier s+1 places), + each thread will be bound to the first place in + its subpartition (except for the master thread + that can be bound to another place in its + subpartition). */ + s = thr->ts.place_partition_len / nthreads; + rest = thr->ts.place_partition_len % nthreads; + rest = (s + 1) * rest + thr->ts.place_partition_off; + if (p < rest) + { + p -= (p - thr->ts.place_partition_off) % (s + 1); + thr->ts.place_partition_len = s + 1; + } + else + { + p -= (p - rest) % s; + thr->ts.place_partition_len = s; + } + thr->ts.place_partition_off = p; + } + else + { + /* T > P. Each subpartition will have just a single + place and we'll place between s and s+1 + threads into each subpartition. */ + s = nthreads / thr->ts.place_partition_len; + rest = nthreads % thr->ts.place_partition_len; + thr->ts.place_partition_off = p; + thr->ts.place_partition_len = 1; + k = 1; + } + break; + } + } + else + bind = omp_proc_bind_false; + /* We only allow the reuse of idle threads for non-nested PARALLEL regions. This appears to be implied by the semantics of threadprivate variables, but perhaps that's reading too much into @@ -341,47 +440,244 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, team will exit. */ pool->threads_used = nthreads; + /* If necessary, expand the size of the gomp_threads array. It is + expected that changes in the number of threads are rare, thus we + make no effort to expand gomp_threads_size geometrically. */ + if (nthreads >= pool->threads_size) + { + pool->threads_size = nthreads + 1; + pool->threads + = gomp_realloc (pool->threads, + pool->threads_size + * sizeof (struct gomp_thread_data *)); + } + /* Release existing idle threads. */ for (; i < n; ++i) { - nthr = pool->threads[i]; + unsigned int place_partition_off = thr->ts.place_partition_off; + unsigned int place_partition_len = thr->ts.place_partition_len; + unsigned int place = 0; + if (__builtin_expect (gomp_places_list != NULL, 0)) + { + switch (bind) + { + case omp_proc_bind_true: + case omp_proc_bind_close: + if (k == s) + { + ++p; + if (p == (team->prev_ts.place_partition_off + + team->prev_ts.place_partition_len)) + p = team->prev_ts.place_partition_off; + k = 1; + if (i == nthreads - rest) + s = 1; + } + else + ++k; + break; + case omp_proc_bind_master: + break; + case omp_proc_bind_spread: + if (k == 0) + { + /* T <= P. */ + if (p < rest) + p += s + 1; + else + p += s; + if (p == (team->prev_ts.place_partition_off + + team->prev_ts.place_partition_len)) + p = team->prev_ts.place_partition_off; + place_partition_off = p; + if (p < rest) + place_partition_len = s + 1; + else + place_partition_len = s; + } + else + { + /* T > P. */ + if (k == s) + { + ++p; + if (p == (team->prev_ts.place_partition_off + + team->prev_ts.place_partition_len)) + p = team->prev_ts.place_partition_off; + k = 1; + if (i == nthreads - rest) + s = 1; + } + else + ++k; + place_partition_off = p; + place_partition_len = 1; + } + break; + } + if (affinity_thr != NULL + || (bind != omp_proc_bind_true + && pool->threads[i]->place != p + 1) + || pool->threads[i]->place <= place_partition_off + || pool->threads[i]->place > (place_partition_off + + place_partition_len)) + { + unsigned int l; + if (affinity_thr == NULL) + { + unsigned int j; + + if (team->prev_ts.place_partition_len > 64) + affinity_thr + = gomp_malloc (team->prev_ts.place_partition_len + * sizeof (struct gomp_thread *)); + else + affinity_thr + = gomp_alloca (team->prev_ts.place_partition_len + * sizeof (struct gomp_thread *)); + memset (affinity_thr, '\0', + team->prev_ts.place_partition_len + * sizeof (struct gomp_thread *)); + for (j = i; j < old_threads_used; j++) + { + if (pool->threads[j]->place + > team->prev_ts.place_partition_off + && (pool->threads[j]->place + <= (team->prev_ts.place_partition_off + + team->prev_ts.place_partition_len))) + { + l = pool->threads[j]->place - 1 + - team->prev_ts.place_partition_off; + pool->threads[j]->data = affinity_thr[l]; + affinity_thr[l] = pool->threads[j]; + } + pool->threads[j] = NULL; + } + if (nthreads > old_threads_used) + memset (&pool->threads[old_threads_used], + '\0', ((nthreads - old_threads_used) + * sizeof (struct gomp_thread *))); + n = nthreads; + affinity_count = old_threads_used - i; + } + if (affinity_count == 0) + break; + l = p; + if (affinity_thr[l - team->prev_ts.place_partition_off] + == NULL) + { + if (bind != omp_proc_bind_true) + continue; + for (l = place_partition_off; + l < place_partition_off + place_partition_len; + l++) + if (affinity_thr[l - team->prev_ts.place_partition_off] + != NULL) + break; + if (l == place_partition_off + place_partition_len) + continue; + } + nthr = affinity_thr[l - team->prev_ts.place_partition_off]; + affinity_thr[l - team->prev_ts.place_partition_off] + = (struct gomp_thread *) nthr->data; + affinity_count--; + pool->threads[i] = nthr; + } + else + nthr = pool->threads[i]; + place = p + 1; + } + else + nthr = pool->threads[i]; nthr->ts.team = team; nthr->ts.work_share = &team->work_shares[0]; nthr->ts.last_work_share = NULL; nthr->ts.team_id = i; nthr->ts.level = team->prev_ts.level + 1; nthr->ts.active_level = thr->ts.active_level; + nthr->ts.place_partition_off = place_partition_off; + nthr->ts.place_partition_len = place_partition_len; #ifdef HAVE_SYNC_BUILTINS nthr->ts.single_count = 0; #endif nthr->ts.static_trip = 0; nthr->task = &team->implicit_task[i]; + nthr->place = place; gomp_init_task (nthr->task, task, icv); team->implicit_task[i].icv.nthreads_var = nthreads_var; + team->implicit_task[i].icv.bind_var = bind_var; nthr->fn = fn; nthr->data = data; team->ordered_release[i] = &nthr->release; } + if (__builtin_expect (affinity_thr != NULL, 0)) + { + /* If AFFINITY_THR is non-NULL just because we had to + permute some threads in the pool, but we've managed + to find exactly as many old threads as we'd find + without affinity, we don't need to handle this + specially anymore. */ + if (nthreads <= old_threads_used + ? (affinity_count == old_threads_used - nthreads) + : (i == old_threads_used)) + { + if (team->prev_ts.place_partition_len > 64) + free (affinity_thr); + affinity_thr = NULL; + affinity_count = 0; + } + else + { + i = 1; + /* We are going to compute the places/subpartitions + again from the beginning. So, we need to reinitialize + vars modified by the switch (bind) above inside + of the loop, to the state they had after the initial + switch (bind). */ + switch (bind) + { + case omp_proc_bind_true: + case omp_proc_bind_close: + if (nthreads > thr->ts.place_partition_len) + /* T > P. S has been changed, so needs + to be recomputed. */ + s = nthreads / thr->ts.place_partition_len; + k = 1; + p = thr->place - 1; + break; + case omp_proc_bind_master: + /* No vars have been changed. */ + break; + case omp_proc_bind_spread: + p = thr->ts.place_partition_off; + if (k != 0) + { + /* T > P. */ + s = nthreads / team->prev_ts.place_partition_len; + k = 1; + } + break; + } + + /* Increase the barrier threshold to make sure all new + threads and all the threads we're going to let die + arrive before the team is released. */ + if (affinity_count) + gomp_barrier_reinit (&pool->threads_dock, + nthreads + affinity_count); + } + } + if (i == nthreads) goto do_release; - /* If necessary, expand the size of the gomp_threads array. It is - expected that changes in the number of threads are rare, thus we - make no effort to expand gomp_threads_size geometrically. */ - if (nthreads >= pool->threads_size) - { - pool->threads_size = nthreads + 1; - pool->threads - = gomp_realloc (pool->threads, - pool->threads_size - * sizeof (struct gomp_thread_data *)); - } } - if (__builtin_expect (nthreads > old_threads_used, 0)) + if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0)) { - long diff = (long) nthreads - (long) old_threads_used; + long diff = (long) (nthreads + affinity_count) - (long) old_threads_used; if (old_threads_used == 0) --diff; @@ -389,14 +685,14 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, #ifdef HAVE_SYNC_BUILTINS __sync_fetch_and_add (&gomp_managed_threads, diff); #else - gomp_mutex_lock (&gomp_remaining_threads_lock); + gomp_mutex_lock (&gomp_managed_threads_lock); gomp_managed_threads += diff; - gomp_mutex_unlock (&gomp_remaining_threads_lock); + gomp_mutex_unlock (&gomp_managed_threads_lock); #endif } attr = &gomp_thread_attr; - if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) + if (__builtin_expect (gomp_places_list != NULL, 0)) { size_t stacksize; pthread_attr_init (&thread_attr); @@ -410,11 +706,78 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, * (nthreads-i)); /* Launch new threads. */ - for (; i < nthreads; ++i, ++start_data) + for (; i < nthreads; ++i) { pthread_t pt; int err; + start_data->ts.place_partition_off = thr->ts.place_partition_off; + start_data->ts.place_partition_len = thr->ts.place_partition_len; + start_data->place = 0; + if (__builtin_expect (gomp_places_list != NULL, 0)) + { + switch (bind) + { + case omp_proc_bind_true: + case omp_proc_bind_close: + if (k == s) + { + ++p; + if (p == (team->prev_ts.place_partition_off + + team->prev_ts.place_partition_len)) + p = team->prev_ts.place_partition_off; + k = 1; + if (i == nthreads - rest) + s = 1; + } + else + ++k; + break; + case omp_proc_bind_master: + break; + case omp_proc_bind_spread: + if (k == 0) + { + /* T <= P. */ + if (p < rest) + p += s + 1; + else + p += s; + if (p == (team->prev_ts.place_partition_off + + team->prev_ts.place_partition_len)) + p = team->prev_ts.place_partition_off; + start_data->ts.place_partition_off = p; + if (p < rest) + start_data->ts.place_partition_len = s + 1; + else + start_data->ts.place_partition_len = s; + } + else + { + /* T > P. */ + if (k == s) + { + ++p; + if (p == (team->prev_ts.place_partition_off + + team->prev_ts.place_partition_len)) + p = team->prev_ts.place_partition_off; + k = 1; + if (i == nthreads - rest) + s = 1; + } + else + ++k; + start_data->ts.place_partition_off = p; + start_data->ts.place_partition_len = 1; + } + break; + } + start_data->place = p + 1; + if (affinity_thr != NULL && pool->threads[i] != NULL) + continue; + gomp_init_thread_affinity (attr, p); + } + start_data->fn = fn; start_data->fn_data = data; start_data->ts.team = team; @@ -430,18 +793,16 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, start_data->task = &team->implicit_task[i]; gomp_init_task (start_data->task, task, icv); team->implicit_task[i].icv.nthreads_var = nthreads_var; + team->implicit_task[i].icv.bind_var = bind_var; start_data->thread_pool = pool; start_data->nested = nested; - if (gomp_cpu_affinity != NULL) - gomp_init_thread_affinity (attr); - - err = pthread_create (&pt, attr, gomp_thread_start, start_data); + err = pthread_create (&pt, attr, gomp_thread_start, start_data++); if (err != 0) gomp_fatal ("Thread creation failed: %s", strerror (err)); } - if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) + if (__builtin_expect (gomp_places_list != NULL, 0)) pthread_attr_destroy (&thread_attr); do_release: @@ -450,21 +811,32 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, /* Decrease the barrier threshold to match the number of threads that should arrive back at the end of this team. The extra threads should be exiting. Note that we arrange for this test - to never be true for nested teams. */ - if (__builtin_expect (nthreads < old_threads_used, 0)) + to never be true for nested teams. If AFFINITY_COUNT is non-zero, + the barrier as well as gomp_managed_threads was temporarily + set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT, + AFFINITY_COUNT if non-zero will be always at least + OLD_THREADS_COUNT - NTHREADS. */ + if (__builtin_expect (nthreads < old_threads_used, 0) + || __builtin_expect (affinity_count, 0)) { long diff = (long) nthreads - (long) old_threads_used; + if (affinity_count) + diff = -affinity_count; + gomp_barrier_reinit (&pool->threads_dock, nthreads); #ifdef HAVE_SYNC_BUILTINS __sync_fetch_and_add (&gomp_managed_threads, diff); #else - gomp_mutex_lock (&gomp_remaining_threads_lock); + gomp_mutex_lock (&gomp_managed_threads_lock); gomp_managed_threads += diff; - gomp_mutex_unlock (&gomp_remaining_threads_lock); + gomp_mutex_unlock (&gomp_managed_threads_lock); #endif } + if (__builtin_expect (affinity_thr != NULL, 0) + && team->prev_ts.place_partition_len > 64) + free (affinity_thr); } @@ -477,9 +849,26 @@ gomp_team_end (void) struct gomp_thread *thr = gomp_thread (); struct gomp_team *team = thr->ts.team; - /* This barrier handles all pending explicit threads. */ - gomp_team_barrier_wait (&team->barrier); - gomp_fini_work_share (thr->ts.work_share); + /* This barrier handles all pending explicit threads. + As #pragma omp cancel parallel might get awaited count in + team->barrier in a inconsistent state, we need to use a different + counter here. */ + gomp_team_barrier_wait_final (&team->barrier); + if (__builtin_expect (team->team_cancelled, 0)) + { + struct gomp_work_share *ws = team->work_shares_to_free; + do + { + struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws); + if (next_ws == NULL) + gomp_ptrlock_set (&ws->next_ws, ws); + gomp_fini_work_share (ws); + ws = next_ws; + } + while (ws != NULL); + } + else + gomp_fini_work_share (thr->ts.work_share); gomp_end_task (); thr->ts = team->prev_ts; @@ -489,9 +878,9 @@ gomp_team_end (void) #ifdef HAVE_SYNC_BUILTINS __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); #else - gomp_mutex_lock (&gomp_remaining_threads_lock); + gomp_mutex_lock (&gomp_managed_threads_lock); gomp_managed_threads -= team->nthreads - 1L; - gomp_mutex_unlock (&gomp_remaining_threads_lock); + gomp_mutex_unlock (&gomp_managed_threads_lock); #endif /* This barrier has gomp_barrier_wait_last counterparts and ensures the team can be safely destroyed. */ @@ -532,8 +921,6 @@ gomp_team_end (void) static void __attribute__((constructor)) initialize_team (void) { - struct gomp_thread *thr; - #ifndef HAVE_TLS static struct gomp_thread initial_thread_tls_data; @@ -543,13 +930,6 @@ initialize_team (void) if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) gomp_fatal ("could not create thread pool destructor."); - -#ifdef HAVE_TLS - thr = &gomp_tls_data; -#else - thr = &initial_thread_tls_data; -#endif - gomp_sem_init (&thr->release, 0); } static void __attribute__((destructor)) diff --git a/libgomp/testsuite/libgomp.c++/affinity-1.C b/libgomp/testsuite/libgomp.c++/affinity-1.C new file mode 100644 index 0000000..d20b392 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/affinity-1.C @@ -0,0 +1,4 @@ +// { dg-do run } +// { dg-set-target-env-var OMP_PROC_BIND "true" } + +#include "../libgomp.c/affinity-1.c" diff --git a/libgomp/testsuite/libgomp.c++/atomic-10.C b/libgomp/testsuite/libgomp.c++/atomic-10.C new file mode 100644 index 0000000..2145f28 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/atomic-10.C @@ -0,0 +1,99 @@ +// { dg-do run } + +extern "C" void abort (void); +int x = 6; + +int +main () +{ + int v, l = 2, s = 1; + #pragma omp atomic + x = -3 + x; + #pragma omp atomic read + v = x; + if (v != 3) + abort (); + #pragma omp atomic update + x = 3 * 2 * 1 + x; + #pragma omp atomic read + v = x; + if (v != 9) + abort (); + #pragma omp atomic capture + v = x = x | 16; + if (v != 25) + abort (); + #pragma omp atomic capture + v = x = x + 14 * 2 / 4; + if (v != 32) + abort (); + #pragma omp atomic capture + v = x = 5 | x; + if (v != 37) + abort (); + #pragma omp atomic capture + v = x = 40 + 12 - 2 - 7 - x; + if (v != 6) + abort (); + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + #pragma omp atomic capture + { v = x; x = 3 + x; } + if (v != 6) + abort (); + #pragma omp atomic capture + { v = x; x = -1 * -1 * -1 * -1 - x; } + if (v != 9) + abort (); + #pragma omp atomic read + v = x; + if (v != -8) + abort (); + #pragma omp atomic capture + { x = 2 * 2 - x; v = x; } + if (v != 12) + abort (); + #pragma omp atomic capture + { x = 7 & x; v = x; } + if (v != 4) + abort (); + #pragma omp atomic capture + { v = x; x = 6; } + if (v != 4) + abort (); + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + #pragma omp atomic capture + { v = x; x = 7 * 8 + 23; } + if (v != 6) + abort (); + #pragma omp atomic read + v = x; + if (v != 79) + abort (); + #pragma omp atomic capture + { v = x; x = 23 + 6 * 4; } + if (v != 79) + abort (); + #pragma omp atomic read + v = x; + if (v != 47) + abort (); + #pragma omp atomic capture + { v = x; x = l ? 17 : 12; } + if (v != 47) + abort (); + #pragma omp atomic capture + { v = x; x = l = s++ + 3; } + if (v != 17 || l != 4 || s != 2) + abort (); + #pragma omp atomic read + v = x; + if (v != 4) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/atomic-11.C b/libgomp/testsuite/libgomp.c++/atomic-11.C new file mode 100644 index 0000000..c7101e0 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/atomic-11.C @@ -0,0 +1,108 @@ +// { dg-do run } + +extern "C" void abort (void); + +template <typename T> +void +foo () +{ + extern T x; + T v, l = 2, s = 1; + #pragma omp atomic + x = -3 + x; + #pragma omp atomic read + v = x; + if (v != 3) + abort (); + #pragma omp atomic update + x = 3 * 2 * 1 + x; + #pragma omp atomic read + v = x; + if (v != 9) + abort (); + #pragma omp atomic capture + v = x = x | 16; + if (v != 25) + abort (); + #pragma omp atomic capture + v = x = x + 14 * 2 / 4; + if (v != 32) + abort (); + #pragma omp atomic capture + v = x = 5 | x; + if (v != 37) + abort (); + #pragma omp atomic capture + v = x = 40 + 12 - 2 - 7 - x; + if (v != 6) + abort (); + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + #pragma omp atomic capture + { v = x; x = 3 + x; } + if (v != 6) + abort (); + #pragma omp atomic capture + { v = x; x = -1 * -1 * -1 * -1 - x; } + if (v != 9) + abort (); + #pragma omp atomic read + v = x; + if (v != -8) + abort (); + #pragma omp atomic capture + { x = 2 * 2 - x; v = x; } + if (v != 12) + abort (); + #pragma omp atomic capture + { x = 7 & x; v = x; } + if (v != 4) + abort (); + #pragma omp atomic capture + { v = x; x = 6; } + if (v != 4) + abort (); + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + #pragma omp atomic capture + { v = x; x = 7 * 8 + 23; } + if (v != 6) + abort (); + #pragma omp atomic read + v = x; + if (v != 79) + abort (); + #pragma omp atomic capture + { v = x; x = 23 + 6 * 4; } + if (v != 79) + abort (); + #pragma omp atomic read + v = x; + if (v != 47) + abort (); + #pragma omp atomic capture + { v = x; x = l ? 17 : 12; } + if (v != 47) + abort (); + #pragma omp atomic capture + { v = x; x = l = s++ + 3; } + if (v != 17 || l != 4 || s != 2) + abort (); + #pragma omp atomic read + v = x; + if (v != 4) + abort (); +} + +int x = 6; + +int +main () +{ + foo <int> (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/atomic-12.C b/libgomp/testsuite/libgomp.c++/atomic-12.C new file mode 100644 index 0000000..d1ae9d8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/atomic-12.C @@ -0,0 +1,58 @@ +// { dg-do run } + +extern "C" void abort (); +int x = 6, cnt; + +int +foo () +{ + return cnt++; +} + +int +main () +{ + int v, *p; + p = &x; + #pragma omp atomic update + p[foo (), 0] = 16 + 6 - p[foo (), 0]; + #pragma omp atomic read + v = x; + if (cnt != 2 || v != 16) + abort (); + #pragma omp atomic capture + v = p[foo () + foo (), 0] = p[foo () + foo (), 0] + 3; + if (cnt != 6 || v != 19) + abort (); + #pragma omp atomic capture + v = p[foo (), 0] = 12 * 1 / 2 + (foo (), 0) + p[foo (), 0]; + if (cnt != 9 || v != 25) + abort (); + #pragma omp atomic capture + { + v = p[foo () & 0]; p[foo () & 0] = (foo (), 1) * 9 - p[foo () & 0]; + } + if (cnt != 13 || v != 25) + abort (); + #pragma omp atomic read + v = x; + if (v != -16) + abort (); + #pragma omp atomic capture + { + p[0 & foo ()] = 16 - 2 + 3 + p[0 & foo ()]; v = p[0 & foo ()]; + } + if (cnt != 16 || v != 1) + abort (); + #pragma omp atomic capture + { + v = p[foo (), 0]; p[foo (), 0] = (foo (), 7) ? 13 : foo () + 6; + } + if (cnt != 19 || v != 1) + abort (); + #pragma omp atomic read + v = x; + if (v != 13) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/atomic-13.C b/libgomp/testsuite/libgomp.c++/atomic-13.C new file mode 100644 index 0000000..0569d1c --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/atomic-13.C @@ -0,0 +1,68 @@ +// { dg-do run } + +extern "C" void abort (); +int cnt; + +int +foo () +{ + return cnt++; +} + +template <typename T> +void +bar () +{ + extern T x; + T v, *p; + p = &x; + #pragma omp atomic update + p[foo (), 0] = 16 + 6 - p[foo (), 0]; + #pragma omp atomic read + v = x; + if (cnt != 2 || v != 16) + abort (); + #pragma omp atomic capture + v = p[foo () + foo (), 0] = p[foo () + foo (), 0] + 3; + if (cnt != 6 || v != 19) + abort (); + #pragma omp atomic capture + v = p[foo (), 0] = 12 * 1 / 2 + (foo (), 0) + p[foo (), 0]; + if (cnt != 9 || v != 25) + abort (); + #pragma omp atomic capture + { + v = p[foo () & 0]; p[foo () & 0] = (foo (), 1) * 9 - p[foo () & 0]; + } + if (cnt != 13 || v != 25) + abort (); + #pragma omp atomic read + v = x; + if (v != -16) + abort (); + #pragma omp atomic capture + { + p[0 & foo ()] = 16 - 2 + 3 + p[0 & foo ()]; v = p[0 & foo ()]; + } + if (cnt != 16 || v != 1) + abort (); + #pragma omp atomic capture + { + v = p[foo (), 0]; p[foo (), 0] = (foo (), 7) ? 13 : foo () + 6; + } + if (cnt != 19 || v != 1) + abort (); + #pragma omp atomic read + v = x; + if (v != 13) + abort (); +} + +int x = 6; + +int +main () +{ + bar <int> (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/atomic-14.C b/libgomp/testsuite/libgomp.c++/atomic-14.C new file mode 100644 index 0000000..4cd9df8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/atomic-14.C @@ -0,0 +1,99 @@ +// { dg-do run } + +extern "C" void abort (void); +int x = 6; + +int +main () +{ + int v, l = 2, s = 1; + #pragma omp atomic seq_cst + x = -3 + x; + #pragma omp atomic read seq_cst + v = x; + if (v != 3) + abort (); + #pragma omp atomic update seq_cst + x = 3 * 2 * 1 + x; + #pragma omp atomic read seq_cst + v = x; + if (v != 9) + abort (); + #pragma omp atomic capture seq_cst + v = x = x | 16; + if (v != 25) + abort (); + #pragma omp atomic capture seq_cst + v = x = x + 14 * 2 / 4; + if (v != 32) + abort (); + #pragma omp atomic capture seq_cst + v = x = 5 | x; + if (v != 37) + abort (); + #pragma omp atomic capture seq_cst + v = x = 40 + 12 - 2 - 7 - x; + if (v != 6) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 6) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = 3 + x; } + if (v != 6) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = -1 * -1 * -1 * -1 - x; } + if (v != 9) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != -8) + abort (); + #pragma omp atomic capture seq_cst + { x = 2 * 2 - x; v = x; } + if (v != 12) + abort (); + #pragma omp atomic capture seq_cst + { x = 7 & x; v = x; } + if (v != 4) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = 6; } + if (v != 4) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 6) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = 7 * 8 + 23; } + if (v != 6) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 79) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = 23 + 6 * 4; } + if (v != 79) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 47) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = l ? 17 : 12; } + if (v != 47) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = l = s++ + 3; } + if (v != 17 || l != 4 || s != 2) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 4) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/atomic-15.C b/libgomp/testsuite/libgomp.c++/atomic-15.C new file mode 100644 index 0000000..1eabce7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/atomic-15.C @@ -0,0 +1,108 @@ +// { dg-do run } + +extern "C" void abort (void); + +template <typename T> +void +foo () +{ + extern T x; + T v, l = 2, s = 1; + #pragma omp atomic seq_cst + x = -3 + x; + #pragma omp atomic read seq_cst + v = x; + if (v != 3) + abort (); + #pragma omp atomic update seq_cst + x = 3 * 2 * 1 + x; + #pragma omp atomic read seq_cst + v = x; + if (v != 9) + abort (); + #pragma omp atomic capture seq_cst + v = x = x | 16; + if (v != 25) + abort (); + #pragma omp atomic capture seq_cst + v = x = x + 14 * 2 / 4; + if (v != 32) + abort (); + #pragma omp atomic capture seq_cst + v = x = 5 | x; + if (v != 37) + abort (); + #pragma omp atomic capture seq_cst + v = x = 40 + 12 - 2 - 7 - x; + if (v != 6) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 6) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = 3 + x; } + if (v != 6) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = -1 * -1 * -1 * -1 - x; } + if (v != 9) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != -8) + abort (); + #pragma omp atomic capture seq_cst + { x = 2 * 2 - x; v = x; } + if (v != 12) + abort (); + #pragma omp atomic capture seq_cst + { x = 7 & x; v = x; } + if (v != 4) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = 6; } + if (v != 4) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 6) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = 7 * 8 + 23; } + if (v != 6) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 79) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = 23 + 6 * 4; } + if (v != 79) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 47) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = l ? 17 : 12; } + if (v != 47) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = l = s++ + 3; } + if (v != 17 || l != 4 || s != 2) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 4) + abort (); +} + +int x = 6; + +int +main () +{ + foo <int> (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/cancel-for-1.C b/libgomp/testsuite/libgomp.c++/cancel-for-1.C new file mode 100644 index 0000000..8183a2d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/cancel-for-1.C @@ -0,0 +1,29 @@ +// { dg-do run } +// { dg-set-target-env-var OMP_CANCELLATION "true" } + +#include <omp.h> +#include "cancel-test.h" + +int +main () +{ + { + S c; + #pragma omp parallel num_threads (32) + { + S a, b; + int i; + #pragma omp for private (b) firstprivate (c) + for (i = 0; i < 1000; ++i) + { + S d; + #pragma omp cancel for + if (omp_get_cancellation ()) + abort (); + b.bump (); + c.bump (); + } + } + } + S::verify (); +} diff --git a/libgomp/testsuite/libgomp.c++/cancel-for-2.C b/libgomp/testsuite/libgomp.c++/cancel-for-2.C new file mode 100644 index 0000000..1595a23 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/cancel-for-2.C @@ -0,0 +1,126 @@ +// { dg-do run } +// { dg-set-target-env-var OMP_CANCELLATION "true" } + +#include <omp.h> +#include "cancel-test.h" + +__attribute__((noinline, noclone)) int +foo (int *x) +{ + S a, b, c, d, e; + int v = 0, w = 0; + #pragma omp parallel num_threads (32) shared (v, w) private (c, d) firstprivate (e) + { + S g; + int i; + c.bump (); + e.bump (); + #pragma omp for private (d, g) firstprivate (b) + for (i = 0; i < 1000; ++i) + { + b.bump (); + d.bump (); + g.bump (); + #pragma omp cancel for if (x[0]) + abort (); + } + #pragma omp for private (d, g) firstprivate (b) + for (i = 0; i < 1000; ++i) + { + b.bump (); + d.bump (); + g.bump (); + #pragma omp cancel for if (x[1]) + #pragma omp atomic + v++; + } + #pragma omp for private (d, g) firstprivate (b) + for (i = 0; i < 1000; ++i) + { + b.bump (); + d.bump (); + g.bump (); + #pragma omp cancel for if (x[2]) + #pragma omp atomic + w += 8; + } + #pragma omp for private (d, g) firstprivate (b) + for (i = 0; i < 1000; ++i) + { + b.bump (); + d.bump (); + g.bump (); + #pragma omp cancel for if (x[3]) + #pragma omp atomic + v += 2; + } + } + if (v != 3000 || w != 0) + abort (); + #pragma omp parallel num_threads (32) shared (v, w) private (c, d) firstprivate (e) + { + S g, h; + int i; + c.bump (); + e.bump (); + /* None of these cancel directives should actually cancel anything, + but the compiler shouldn't know that and thus should use cancellable + barriers at the end of all the workshares. */ + #pragma omp cancel parallel if (omp_get_thread_num () == 1 && x[4]) + #pragma omp for private (d, g) firstprivate (b) + for (i = 0; i < 1000; ++i) + { + b.bump (); + d.bump (); + g.bump (); + #pragma omp cancel for if (x[0]) + abort (); + } + #pragma omp cancel parallel if (omp_get_thread_num () == 2 && x[4]) + #pragma omp for private (d, g) firstprivate (b) + for (i = 0; i < 1000; ++i) + { + b.bump (); + d.bump (); + g.bump (); + #pragma omp cancel for if (x[1]) + #pragma omp atomic + v++; + } + #pragma omp cancel parallel if (omp_get_thread_num () == 3 && x[4]) + #pragma omp for private (d, g) firstprivate (b) + for (i = 0; i < 1000; ++i) + { + b.bump (); + d.bump (); + g.bump (); + #pragma omp cancel for if (x[2]) + #pragma omp atomic + w += 8; + } + #pragma omp cancel parallel if (omp_get_thread_num () == 4 && x[4]) + #pragma omp for private (d, g) firstprivate (b) + for (i = 0; i < 1000; ++i) + { + b.bump (); + d.bump (); + g.bump (); + #pragma omp cancel for if (x[3]) + #pragma omp atomic + v += 2; + } + #pragma omp cancel parallel if (omp_get_thread_num () == 5 && x[4]) + } + if (v != 6000 || w != 0) + abort (); + return 0; +} + +int +main () +{ + int x[] = { 1, 0, 1, 0, 0 }; + if (omp_get_cancellation ()) + foo (x); + S::verify (); +} diff --git a/libgomp/testsuite/libgomp.c++/cancel-parallel-1.C b/libgomp/testsuite/libgomp.c++/cancel-parallel-1.C new file mode 100644 index 0000000..033d676 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/cancel-parallel-1.C @@ -0,0 +1,18 @@ +// { dg-do run } +// { dg-set-target-env-var OMP_CANCELLATION "true" } + +#include <omp.h> +#include "cancel-test.h" + +int +main () +{ + #pragma omp parallel num_threads (32) + { + S a; + #pragma omp cancel parallel + if (omp_get_cancellation ()) + abort (); + } + S::verify (); +} diff --git a/libgomp/testsuite/libgomp.c++/cancel-parallel-2.C b/libgomp/testsuite/libgomp.c++/cancel-parallel-2.C new file mode 100644 index 0000000..340423b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/cancel-parallel-2.C @@ -0,0 +1,57 @@ +// { dg-do run } +// { dg-set-target-env-var OMP_CANCELLATION "true" } + +#include <unistd.h> +#include <omp.h> +#include "cancel-test.h" + +static void +foo (int *x) +{ + S a, b, c; + #pragma omp parallel firstprivate(x, c) num_threads (32) private (b) + { + S d; + b.bump (); + c.bump (); + int thr = omp_get_thread_num (); + switch (x[thr]) + { + case 4: + #pragma omp cancel parallel + break; + case 3: + #pragma omp task + usleep (1000); + #pragma omp task + usleep (2000); + #pragma omp task + usleep (4000); + break; + case 2: + usleep (1000); + /* FALLTHRU */ + case 1: + #pragma omp cancellation point parallel + break; + } + #pragma omp barrier + if (omp_get_cancellation ()) + abort (); + } +} + +int +main () +{ + int i, j, x[32] = { 0, 1, 2, 4, 2, 2, 1, 0 }; + foo (x); + for (i = 0; i < 32; i++) + { + for (j = 0; j < 32; j++) + x[j] = rand () & 3; + x[rand () & 31] = 4; + foo (x); + } + S::verify (); +} diff --git a/libgomp/testsuite/libgomp.c++/cancel-parallel-3.C b/libgomp/testsuite/libgomp.c++/cancel-parallel-3.C new file mode 100644 index 0000000..9db7a4f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/cancel-parallel-3.C @@ -0,0 +1,50 @@ +// { dg-do run } +// { dg-set-target-env-var OMP_CANCELLATION "true" } + +#include <omp.h> +#include <unistd.h> +#include "cancel-test.h" + +static inline void +do_some_work (void) +{ + asm volatile ("" : : : "memory"); +} + +void +foo () +{ + S a, b, c; + omp_set_dynamic (0); + omp_set_schedule (omp_sched_static, 1); + #pragma omp parallel num_threads (16) private (b) firstprivate (c) + { + S d; + int i, j; + b.bump (); + c.bump (); + do_some_work (); + #pragma omp barrier + if (omp_get_thread_num () == 1) + { + sleep (2); + #pragma omp cancellation point parallel + } + for (j = 3; j <= 16; j++) + #pragma omp for schedule (runtime) nowait + for (i = 0; i < j; i++) + do_some_work (); + if (omp_get_thread_num () == 0) + { + sleep (1); + #pragma omp cancel parallel + } + } +} + +int +main () +{ + foo (); + S::verify (); +} diff --git a/libgomp/testsuite/libgomp.c++/cancel-sections-1.C b/libgomp/testsuite/libgomp.c++/cancel-sections-1.C new file mode 100644 index 0000000..81a9c35 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/cancel-sections-1.C @@ -0,0 +1,43 @@ +// { dg-do run } +// { dg-set-target-env-var OMP_CANCELLATION "true" } + +#include <omp.h> +#include "cancel-test.h" + +int +main () +{ + if (!omp_get_cancellation ()) + return 0; + #pragma omp parallel num_threads (32) + { + S a; + #pragma omp sections + { + { + S b; + #pragma omp cancel sections + abort (); + } + #pragma omp section + { + S c; + #pragma omp cancel sections + abort (); + } + #pragma omp section + { + S d; + #pragma omp cancel sections + abort (); + } + #pragma omp section + { + S e; + #pragma omp cancel sections + abort (); + } + } + } + S::verify (); +} diff --git a/libgomp/testsuite/libgomp.c++/cancel-taskgroup-1.C b/libgomp/testsuite/libgomp.c++/cancel-taskgroup-1.C new file mode 100644 index 0000000..4f66859 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/cancel-taskgroup-1.C @@ -0,0 +1,4 @@ +// { dg-do run } +// { dg-set-target-env-var OMP_CANCELLATION "true" } + +#include "../libgomp.c/cancel-taskgroup-1.c" diff --git a/libgomp/testsuite/libgomp.c++/cancel-taskgroup-2.C b/libgomp/testsuite/libgomp.c++/cancel-taskgroup-2.C new file mode 100644 index 0000000..d4a02e9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/cancel-taskgroup-2.C @@ -0,0 +1,4 @@ +// { dg-do run } +// { dg-set-target-env-var OMP_CANCELLATION "true" } + +#include "../libgomp.c/cancel-taskgroup-2.c" diff --git a/libgomp/testsuite/libgomp.c++/cancel-taskgroup-3.C b/libgomp/testsuite/libgomp.c++/cancel-taskgroup-3.C new file mode 100644 index 0000000..c897265 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/cancel-taskgroup-3.C @@ -0,0 +1,58 @@ +// { dg-do run } +// { dg-set-target-env-var OMP_CANCELLATION "true" } + +#include <unistd.h> +#include <omp.h> +#include "cancel-test.h" + +void +foo () +{ + S a, b, c, d, e, f; + #pragma omp parallel private (c, d) firstprivate (e, f) + #pragma omp taskgroup + { + c.bump (); + e.bump (); + #pragma omp task firstprivate (b, f) private (d) + { + S h; + b.bump (); + d.bump (); + f.bump (); + #pragma omp cancel taskgroup + if (omp_get_cancellation ()) + abort (); + } + } + #pragma omp parallel private (c, d) firstprivate (e, f) + { + #pragma omp barrier + #pragma omp single + #pragma omp taskgroup + { + int i; + c.bump (); + e.bump (); + for (i = 0; i < 50; i++) + #pragma omp task firstprivate (b, f) private (d) + { + S h; + b.bump (); + d.bump (); + f.bump (); + #pragma omp cancellation point taskgroup + usleep (30); + #pragma omp cancel taskgroup if (i > 5) + } + } + usleep (10); + } +} + +int +main () +{ + foo (); + S::verify (); +} diff --git a/libgomp/testsuite/libgomp.c++/cancel-test.h b/libgomp/testsuite/libgomp.c++/cancel-test.h new file mode 100644 index 0000000..776d6ee --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/cancel-test.h @@ -0,0 +1,47 @@ +#include <stdlib.h> +#include <omp.h> + +struct S +{ + static int s; + int v; + S () + { + #pragma omp atomic + s++; + } + + S (int x) + { + #pragma omp atomic + s++; + v = x; + } + + ~S () + { + #pragma omp atomic + s--; + } + + S (const S &x) + { + #pragma omp atomic + s++; + v = x.v; + } + + static void + verify () + { + if (s) abort (); + } + + void + bump () + { + v++; + } +}; + +int S::s = 0; diff --git a/libgomp/testsuite/libgomp.c++/for-10.C b/libgomp/testsuite/libgomp.c++/for-10.C new file mode 100644 index 0000000..fb1a3e9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-10.C @@ -0,0 +1,44 @@ +extern "C" void abort (); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#define F simd +#define G simd +#define S +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F parallel for simd +#define G pf_simd +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F for simd +#define G f_simd +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +int +main () +{ + if (test_simd_normal () + || test_pf_simd_static () + || test_pf_simd_static32 () + || test_pf_simd_auto () + || test_pf_simd_guided32 () + || test_pf_simd_runtime () + || test_f_simd_static () + || test_f_simd_static32 () + || test_f_simd_auto () + || test_f_simd_guided32 () + || test_f_simd_runtime ()) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/for-11.C b/libgomp/testsuite/libgomp.c++/for-11.C new file mode 100644 index 0000000..0244e4d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-11.C @@ -0,0 +1,108 @@ +extern "C" void abort (); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#pragma omp declare target + +#define F distribute +#define G d +#define S +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F distribute +#define G d_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F distribute simd +#define G ds +#define S +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F distribute simd +#define G ds_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "../libgomp.c/for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F distribute parallel for +#define G dpf +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F distribute parallel for dist_schedule(static, 128) +#define G dpf_ds128 +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F distribute parallel for simd +#define G dpfs +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F distribute parallel for simd dist_schedule(static, 128) +#define G dpfs_ds128 +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#pragma omp end declare target + +int +main () +{ + int err = 0; + #pragma omp target teams reduction(|:err) + { + err |= test_d_normal (); + err |= test_d_ds128_normal (); + err |= test_ds_normal (); + err |= test_ds_ds128_normal (); + err |= test_dpf_static (); + err |= test_dpf_static32 (); + err |= test_dpf_auto (); + err |= test_dpf_guided32 (); + err |= test_dpf_runtime (); + err |= test_dpf_ds128_static (); + err |= test_dpf_ds128_static32 (); + err |= test_dpf_ds128_auto (); + err |= test_dpf_ds128_guided32 (); + err |= test_dpf_ds128_runtime (); + err |= test_dpfs_static (); + err |= test_dpfs_static32 (); + err |= test_dpfs_auto (); + err |= test_dpfs_guided32 (); + err |= test_dpfs_runtime (); + err |= test_dpfs_ds128_static (); + err |= test_dpfs_ds128_static32 (); + err |= test_dpfs_ds128_auto (); + err |= test_dpfs_ds128_guided32 (); + err |= test_dpfs_ds128_runtime (); + } + if (err) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/for-9.C b/libgomp/testsuite/libgomp.c++/for-9.C new file mode 100644 index 0000000..86b9d93 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/for-9.C @@ -0,0 +1,33 @@ +extern "C" void abort (); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#define F parallel for +#define G pf +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +#define F for +#define G f +#include "../libgomp.c/for-1.h" +#undef F +#undef G + +int +main () +{ + if (test_pf_static () + || test_pf_static32 () + || test_pf_auto () + || test_pf_guided32 () + || test_pf_runtime () + || test_f_static () + || test_f_static32 () + || test_f_auto () + || test_f_guided32 () + || test_f_runtime ()) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/simd-1.C b/libgomp/testsuite/libgomp.c++/simd-1.C new file mode 100644 index 0000000..16ef159 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/simd-1.C @@ -0,0 +1,79 @@ +// { dg-do run } +// { dg-options "-O2" } +// { dg-additional-options "-msse2" { target sse2_runtime } } +// { dg-additional-options "-mavx" { target avx_runtime } } + +extern "C" void abort (); +int a[1024] __attribute__((aligned (32))) = { 1 }; +int b[1024] __attribute__((aligned (32))) = { 1 }; +int k, m; +struct U { U (); ~U (); int u; }; +struct V +{ + V () : v (8) {} + ~V () + { + if (v > 38 + 4 + 3 * 1024 + 1) + abort (); + } + V &operator= (const V &x) { v = x.v + 1; return *this; } + int v; +}; + +__attribute__((noinline, noclone)) +U::U () : u (6) +{ +} + +__attribute__((noinline, noclone)) +U::~U () +{ + if (u > 38 + 4 + 3 * 1023) + abort (); +} + +__attribute__((noinline, noclone)) int +foo (int *p) +{ + int i, s = 0; + U u; + V v; + #pragma omp simd aligned(a, p : 32) linear(k: m + 1) \ + reduction(+:s) lastprivate(u, v) + for (i = 0; i < 1024; i++) + { + a[i] *= p[i]; + u.u = p[i] + k; + k += m + 1; + v.v = p[i] + k; + s += p[i] + k; + } + if (u.u != 36 + 4 + 3 * 1023 || v.v != 36 + 4 + 3 * 1024 + 1) + abort (); + return s; +} + +int +main () +{ +#if __SIZEOF_INT__ >= 4 + int i; + k = 4; + m = 2; + for (i = 0; i < 1024; i++) + { + a[i] = i - 512; + b[i] = (i - 51) % 39; + } + int s = foo (b); + for (i = 0; i < 1024; i++) + { + if (b[i] != (i - 51) % 39 + || a[i] != (i - 512) * b[i]) + abort (); + } + if (k != 4 + 3 * 1024 || s != 1596127) + abort (); +#endif + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/simd-2.C b/libgomp/testsuite/libgomp.c++/simd-2.C new file mode 100644 index 0000000..6b12415 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/simd-2.C @@ -0,0 +1,36 @@ +// { dg-do run } +// { dg-options "-O2" } +// { dg-additional-options "-msse2" { target sse2_runtime } } +// { dg-additional-options "-mavx" { target avx_runtime } } + +extern "C" void abort (); +__UINTPTR_TYPE__ arr[1027]; + +__attribute__((noinline, noclone)) void +foo () +{ + int i, v; + #pragma omp simd private (v) safelen(16) + for (i = 0; i < 1027; i++) + arr[i] = (__UINTPTR_TYPE__) &v; +} + +int +main () +{ + int i, j, cnt = 0; + __UINTPTR_TYPE__ arr2[16]; + foo (); + for (i = 0; i < 1027; i++) + { + for (j = 0; j < cnt; j++) + if (arr[i] == arr2[j]) + break; + if (j != cnt) + continue; + if (cnt == 16) + abort (); + arr2[cnt++] = arr[i]; + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/simd-3.C b/libgomp/testsuite/libgomp.c++/simd-3.C new file mode 100644 index 0000000..1c6d8e0 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/simd-3.C @@ -0,0 +1,131 @@ +// { dg-do run } +// { dg-options "-O2" } +// { dg-additional-options "-msse2" { target sse2_runtime } } +// { dg-additional-options "-mavx" { target avx_runtime } } + +extern "C" void abort (); +int a[1024] __attribute__((aligned (32))) = { 1 }; +int b[1024] __attribute__((aligned (32))) = { 1 }; +unsigned char c[1024] __attribute__((aligned (32))) = { 1 }; +int k, m; +__UINTPTR_TYPE__ u, u2, u3; + +__attribute__((noinline, noclone)) int +foo (int *p) +{ + int i, s = 0, s2 = 0, t, t2; + #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \ + lastprivate (t2) + for (i = 0; i < 512; i++) + { + a[i] *= p[i]; + t2 = k + p[i]; + k += m + 1; + s += p[i] + k; + c[i]++; + } + #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \ + lastprivate (t, u, u2, u3) + for (i = 512; i < 1024; i++) + { + a[i] *= p[i]; + k += m + 1; + t = k + p[i]; + u = (__UINTPTR_TYPE__) &k; + u2 = (__UINTPTR_TYPE__) &s2; + u3 = (__UINTPTR_TYPE__) &t; + s2 += t; + c[i]++; + } + return s + s2 + t + t2; +} + +__attribute__((noinline, noclone)) long int +bar (int *p, long int n, long int o) +{ + long int i, s = 0, s2 = 0, t, t2; + #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \ + lastprivate (t2) + for (i = 0; i < n; i++) + { + a[i] *= p[i]; + t2 = k + p[i]; + k += m + 1; + s += p[i] + k; + c[i]++; + } + #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \ + lastprivate (t, u, u2, u3) + for (i = n; i < o; i++) + { + a[i] *= p[i]; + k += m + 1; + t = k + p[i]; + u = (__UINTPTR_TYPE__) &k; + u2 = (__UINTPTR_TYPE__) &s2; + u3 = (__UINTPTR_TYPE__) &t; + s2 += t; + c[i]++; + } + return s + s2 + t + t2; +} + +int +main () +{ +#if __SIZEOF_INT__ >= 4 + int i; + k = 4; + m = 2; + for (i = 0; i < 1024; i++) + { + a[i] = i - 512; + b[i] = (i - 51) % 39; + c[i] = (unsigned char) i; + } + int s = foo (b); + for (i = 0; i < 1024; i++) + { + if (b[i] != (i - 51) % 39 + || a[i] != (i - 512) * b[i] + || c[i] != (unsigned char) (i + 1)) + abort (); + a[i] = i - 512; + } + if (k != 4 + 3 * 1024 + || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023])) + abort (); + k = 4; + s = bar (b, 512, 1024); + for (i = 0; i < 1024; i++) + { + if (b[i] != (i - 51) % 39 + || a[i] != (i - 512) * b[i] + || c[i] != (unsigned char) (i + 2)) + abort (); + a[i] = i - 512; + } + if (k != 4 + 3 * 1024 + || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023])) + abort (); + k = 4; + s = bar (b, 511, 1021); + for (i = 0; i < 1021; i++) + { + if (b[i] != (i - 51) % 39 + || a[i] != (i - 512) * b[i] + || c[i] != (unsigned char) (i + 3)) + abort (); + a[i] = i - 512; + } + for (i = 1021; i < 1024; i++) + if (b[i] != (i - 51) % 39 + || a[i] != i - 512 + || c[i] != (unsigned char) (i + 2)) + abort (); + if (k != 4 + 3 * 1021 + || s != 1586803 + (4 + 3 * 510 + b[510]) + (4 + 3 * 1021 + b[1020])) + abort (); +#endif + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/simd-4.C b/libgomp/testsuite/libgomp.c++/simd-4.C new file mode 100644 index 0000000..bdfacc6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/simd-4.C @@ -0,0 +1,45 @@ +// { dg-do run } +// { dg-options "-O2" } +// { dg-additional-options "-msse2" { target sse2_runtime } } +// { dg-additional-options "-mavx" { target avx_runtime } } + +extern "C" void abort (); +int a[1024] __attribute__((aligned (32))) = { 1 }; +struct S +{ + int s; + S () : s (0) {} + ~S () {} +}; +#pragma omp declare reduction (+:S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:int:omp_out += omp_in) + +__attribute__((noinline, noclone)) int +foo () +{ + int i, u = 0; + S s, t; + #pragma omp simd aligned(a : 32) reduction(+:s) reduction(foo:t, u) + for (i = 0; i < 1024; i++) + { + int x = a[i]; + s.s += x; + t.s += x; + u += x; + } + if (t.s != s.s || u != s.s) + abort (); + return s.s; +} + +int +main () +{ + int i; + for (i = 0; i < 1024; i++) + a[i] = (i & 31) + (i / 128); + int s = foo (); + if (s != 19456) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/simd-5.C b/libgomp/testsuite/libgomp.c++/simd-5.C new file mode 100644 index 0000000..6c4627e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/simd-5.C @@ -0,0 +1,47 @@ +// { dg-do run } +// { dg-options "-O2" } +// { dg-additional-options "-msse2" { target sse2_runtime } } +// { dg-additional-options "-mavx" { target avx_runtime } } + +extern "C" void abort (); +int a[1024] __attribute__((aligned (32))) = { 1 }; +struct S +{ + int s; + S () : s (0) {} + ~S () {} +}; +#pragma omp declare reduction (+:S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:int:omp_out += omp_in) + +__attribute__((noinline, noclone)) int +foo () +{ + int i, u = 0, q = 0; + S s, t; + #pragma omp simd aligned(a : 32) reduction(+:s, q) reduction(foo:t, u) \ + safelen(1) + for (i = 0; i < 1024; i++) + { + int x = a[i]; + s.s += x; + t.s += x; + u += x; + q++; + } + if (t.s != s.s || u != s.s || q != 1024) + abort (); + return s.s; +} + +int +main () +{ + int i; + for (i = 0; i < 1024; i++) + a[i] = (i & 31) + (i / 128); + int s = foo (); + if (s != 19456) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/simd-6.C b/libgomp/testsuite/libgomp.c++/simd-6.C new file mode 100644 index 0000000..cfc13d0 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/simd-6.C @@ -0,0 +1,70 @@ +// { dg-do run } +// { dg-options "-O2" } +// { dg-additional-options "-msse2" { target sse2_runtime } } +// { dg-additional-options "-mavx" { target avx_runtime } } + +extern "C" void abort (); +int a[1024] __attribute__((aligned (32))) = { 1 }; +struct S +{ + int s; + S () : s (0) {} + S (int x) : s (x) {} + ~S () {} +}; +#pragma omp declare reduction (+:S:omp_out.s += omp_in.s) \ + initializer (omp_priv (0)) +#pragma omp declare reduction (foo:S:omp_out.s += omp_in.s) \ + initializer (omp_priv (0)) +#pragma omp declare reduction (foo:int:omp_out += omp_in) \ + initializer (omp_priv = 0) + +__attribute__((noinline, noclone)) S +foo (S s) +{ + int i, v = 0, &u = v; + S t; + #pragma omp simd aligned(a : 32) reduction(+:s) reduction(foo:t, u) + for (i = 0; i < 1024; i++) + { + int x = a[i]; + s.s += x; + t.s += x; + u += x; + } + if (t.s != s.s || u != s.s) + abort (); + return t; +} + +__attribute__((noinline, noclone)) int +bar (S &s, S &t) +{ + int i, v = 0, &u = v; + #pragma omp simd aligned(a : 32) reduction(+:s) reduction(foo:t, u) + for (i = 0; i < 1024; i++) + { + int x = a[i]; + s.s += x; + t.s += x; + u += x; + } + if (t.s != s.s || u != s.s) + abort (); + return s.s; +} + +int +main () +{ + int i; + for (i = 0; i < 1024; i++) + a[i] = (i & 31) + (i / 128); + S q; + int s = foo (q).s; + if (s != 19456) + abort (); + S r, v; + if (bar (r, v) != s) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/simd-7.C b/libgomp/testsuite/libgomp.c++/simd-7.C new file mode 100644 index 0000000..5a6f4ce --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/simd-7.C @@ -0,0 +1,72 @@ +// { dg-do run } +// { dg-options "-O2" } +// { dg-additional-options "-msse2" { target sse2_runtime } } +// { dg-additional-options "-mavx" { target avx_runtime } } + +extern "C" void abort (); +int a[1024] __attribute__((aligned (32))) = { 1 }; +struct S +{ + int s; + S () : s (0) {} + S (int x) : s (x) {} + ~S () {} +}; +#pragma omp declare reduction (+:S:omp_out.s += omp_in.s) \ + initializer (omp_priv (0)) +#pragma omp declare reduction (foo:S:omp_out.s += omp_in.s) \ + initializer (omp_priv (0)) +#pragma omp declare reduction (foo:int:omp_out += omp_in) \ + initializer (omp_priv = 0) + +__attribute__((noinline, noclone)) S +foo (S s) +{ + int i, v = 0, &u = v; + S t; + #pragma omp simd aligned(a : 32) reduction(+:s) reduction(foo:t, u) \ + safelen(1) + for (i = 0; i < 1024; i++) + { + int x = a[i]; + s.s += x; + t.s += x; + u += x; + } + if (t.s != s.s || u != s.s) + abort (); + return t; +} + +__attribute__((noinline, noclone)) int +bar (S &s, S &t) +{ + int i, v = 0, &u = v; + #pragma omp simd aligned(a : 32) reduction(+:s) reduction(foo:t, u) \ + safelen(1) + for (i = 0; i < 1024; i++) + { + int x = a[i]; + s.s += x; + t.s += x; + u += x; + } + if (t.s != s.s || u != s.s) + abort (); + return s.s; +} + +int +main () +{ + int i; + for (i = 0; i < 1024; i++) + a[i] = (i & 31) + (i / 128); + S q; + int s = foo (q).s; + if (s != 19456) + abort (); + S r, v; + if (bar (r, v) != s) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/simd-8.C b/libgomp/testsuite/libgomp.c++/simd-8.C new file mode 100644 index 0000000..7c75832 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/simd-8.C @@ -0,0 +1,47 @@ +// PR libgomp/58482 +// { dg-do run } +// { dg-options "-O2" } +// { dg-additional-options "-msse2" { target sse2_runtime } } +// { dg-additional-options "-mavx" { target avx_runtime } } + +extern "C" void abort (); +int a[1024] __attribute__((aligned (32))) = { 1 }; +struct S +{ + int s; + S () : s (0) {} + ~S () {} +}; +#pragma omp declare reduction (+:S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:int:omp_out += omp_in) + +__attribute__((noinline, noclone)) int +foo () +{ + int i, u = 0; + S s, t; + #pragma omp parallel for simd aligned(a : 32) reduction(+:s) \ + reduction(foo:t, u) + for (i = 0; i < 1024; i++) + { + int x = a[i]; + s.s += x; + t.s += x; + u += x; + } + if (t.s != s.s || u != s.s) + abort (); + return s.s; +} + +int +main () +{ + int i; + for (i = 0; i < 1024; i++) + a[i] = (i & 31) + (i / 128); + int s = foo (); + if (s != 19456) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/target-1.C b/libgomp/testsuite/libgomp.c++/target-1.C new file mode 100644 index 0000000..3cf01d9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-1.C @@ -0,0 +1 @@ +#include "../libgomp.c/target-1.c" diff --git a/libgomp/testsuite/libgomp.c++/target-2-aux.cc b/libgomp/testsuite/libgomp.c++/target-2-aux.cc new file mode 100644 index 0000000..b405404 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-2-aux.cc @@ -0,0 +1,5 @@ +double f[1024]; +double (&fr) [1024] = f; +double gbuf[1024]; +double *g = gbuf; +double *&gr = g; diff --git a/libgomp/testsuite/libgomp.c++/target-2.C b/libgomp/testsuite/libgomp.c++/target-2.C new file mode 100644 index 0000000..83207cd --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-2.C @@ -0,0 +1,58 @@ +// { dg-options "-O2 -fopenmp" } +// { dg-additional-sources "target-2-aux.cc" } + +extern "C" void abort (void); + +void +fn1 (double *x, double *y, int z) +{ + int i; + for (i = 0; i < z; i++) + { + x[i] = i & 31; + y[i] = (i & 63) - 30; + } +} + +double b[1024]; +double (&br) [1024] = b; +double cbuf[1024]; +double *c = cbuf; +double *&cr = c; +extern double (&fr) [1024]; +extern double *&gr; + +double +fn2 (int x, double (&dr) [1024], double *&er) +{ + double s = 0; + double h[1024]; + double (&hr) [1024] = h; + double ibuf[1024]; + double *i = ibuf; + double *&ir = i; + int j; + fn1 (hr + 2 * x, ir + 2 * x, x); + #pragma omp target map(to: br[:x], cr[0:x], dr[x:x], er[x:x]) \ + map(to: fr[0:x], gr[0:x], hr[2 * x:x], ir[2 * x:x]) + #pragma omp parallel for reduction(+:s) + for (j = 0; j < x; j++) + s += br[j] * cr[j] + dr[x + j] + er[x + j] + + fr[j] + gr[j] + hr[2 * x + j] + ir[2 * x + j]; + return s; +} + +int +main () +{ + double d[1024]; + double ebuf[1024]; + double *e = ebuf; + fn1 (br, cr, 128); + fn1 (d + 128, e + 128, 128); + fn1 (fr, gr, 128); + double h = fn2 (128, d, e); + if (h != 20416.0) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-3.C b/libgomp/testsuite/libgomp.c++/target-3.C new file mode 100644 index 0000000..2e4586f --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-3.C @@ -0,0 +1 @@ +#include "../libgomp.c/target-2.c" diff --git a/libgomp/testsuite/libgomp.c++/taskgroup-1.C b/libgomp/testsuite/libgomp.c++/taskgroup-1.C new file mode 100644 index 0000000..5129896 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/taskgroup-1.C @@ -0,0 +1 @@ +#include "../libgomp.c/taskgroup-1.c" diff --git a/libgomp/testsuite/libgomp.c++/udr-1.C b/libgomp/testsuite/libgomp.c++/udr-1.C new file mode 100644 index 0000000..c3ddd85 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/udr-1.C @@ -0,0 +1,82 @@ +// { dg-do run } + +extern "C" void abort (); + +struct S +{ + int s; + void foo (S &x) { s += x.s; } + void foo (S &x, bool y) { s += x.s; if (y) abort (); } + S (const S &x) { s = x.s + 1; } + S (const S &x, bool y) { s = x.s + 2; if (y) abort (); } + S () { s = 6; } + ~S (); +}; + +S::~S () +{ + if (s < 6) abort (); + s = -1; + /* Ensure the above store is not DSEd. */ + asm volatile ("" : : "r" (&s) : "memory"); +} + +void +bar (S &x) +{ + if (x.s != 6) abort (); + x.s = 15; +} + +#pragma omp declare reduction (foo: S: omp_out.foo (omp_in)) \ + initializer (omp_priv (omp_orig, false)) +#pragma omp declare reduction (foo: char, int, short: omp_out += omp_in - 4) \ + initializer (omp_priv (4)) +#pragma omp declare reduction (+: S: omp_out.foo (omp_in, false)) \ + initializer (omp_priv (omp_orig)) + +namespace N +{ + #pragma omp declare reduction (foo: S: omp_out.foo (omp_in)) \ + initializer (::bar (omp_priv)) + namespace M {} +} + +int +main () +{ + S a, b, c, s, t, u; + if (a.s != 6 || b.s != 6 || c.s != 6 + || s.s != 6 || t.s != 6 || u.s != 6) abort (); + s.s = 9; t.s = 10; u.s = 11; + int d = 0, e = 0, f = 0, g = 0, h = 30, v = 2, q = 0; + #pragma omp declare reduction (foo: S: omp_out.foo (omp_in, true)) \ + initializer (omp_priv = omp_orig) + { + #pragma omp declare reduction (foo: S: omp_out.foo (omp_in, false)) \ + initializer (omp_priv = omp_orig) + #pragma omp parallel num_threads (4) reduction (N::operator +: q) \ + reduction (operator +: a, d) reduction (::operator +: b, e) \ + reduction (+: c, f) reduction (::N::M::operator +: g) \ + reduction (::N::min: h) reduction (foo: s) reduction (N::foo: t) \ + reduction (::foo: u) reduction (::foo: v) + { + if (a.s != 7 || b.s != 7 || c.s != 7 + || s.s != 10 || t.s != 15 || u.s != 13 + || v != 4 || d || e || f || g || h != __INT_MAX__) abort (); + asm volatile ("" : "+m" (a.s), "+m" (b.s)); + asm volatile ("" : "+m" (c.s), "+r" (d)); + asm volatile ("" : "+r" (e), "+r" (f)); + asm volatile ("" : "+r" (g), "+r" (h)); + asm volatile ("" : "+m" (s.s), "+m" (t.s)); + asm volatile ("" : "+m" (u.s), "+r" (v)); + a.s++; b.s++; c.s++; d++; e++; f++; g++; h = t.s; + s.s++; t.s++; u.s++; v++; q++; + } + } + if (a.s != 6 + q * 8 || b.s != 6 + q * 8 || c.s != 6 + q * 8 + || d != q || e != q || f != q || g != q || h != 15 + || s.s != 9 + q * 11 || t.s != 10 + q * 16 || u.s != 11 + q * 14 + || v != 2 + q) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/udr-2.C b/libgomp/testsuite/libgomp.c++/udr-2.C new file mode 100644 index 0000000..5408be1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/udr-2.C @@ -0,0 +1,88 @@ +// { dg-do run } + +extern "C" void abort (); + +namespace NS +{ + struct U + { + void foo (U &, bool); + U (); + }; + struct S + { + int s; + #pragma omp declare reduction (foo : U, S : omp_out.foo (omp_in, false)) + #pragma omp declare reduction (foo : int : omp_out += omp_in) \ + initializer (omp_priv = int ()) + void baz (int v) + { + S s; + int q = 0; + if (s.s != 6 || v != 0) abort (); + s.s = 20; + #pragma omp parallel num_threads (4) reduction (foo : s, v) \ + reduction (::NS::U::operator + : q) + { + if (s.s != 6 || q != 0 || v != 0) abort (); + asm volatile ("" : "+m" (s.s), "+r" (q), "+r" (v)); + s.s++; q++; v++; + } + if (s.s != 20 + q * 7 || q != v) abort (); + } + void foo (S &x) { s += x.s; } + void foo (S &x, bool y) { s += x.s; if (y) abort (); } + S (const S &x) { s = x.s + 1; } + S (const S &x, bool y) { s = x.s + 2; if (y) abort (); } + S () { s = 6; } + S (int x) { s = x; } + ~S (); + }; + #pragma omp declare reduction (bar : S : omp_out.foo (omp_in)) \ + initializer (omp_priv (8)) +} + +NS::S::~S () +{ + if (s < 6) abort (); + s = -1; + /* Ensure the above store is not DSEd. */ + asm volatile ("" : : "r" (&s) : "memory"); +} + +struct T : public NS::S +{ + void baz () + { + S s; + int q = 0; + if (s.s != 6) abort (); + #pragma omp parallel num_threads (4) reduction (foo:s) \ + reduction (+: q) + { + if (s.s != 6 || q != 0) abort (); + asm volatile ("" : "+m" (s.s), "+r" (q)); + s.s += 2; q++; + } + if (s.s != 6 + q * 8) abort (); + } +}; + +int +main () +{ + NS::S s; + s.baz (0); + T t; + t.baz (); + int q = 0; + if (s.s != 6) abort (); + // Test ADL + #pragma omp parallel num_threads (4) reduction (bar:s) reduction (+:q) + { + if (s.s != 8 || q != 0) abort (); + asm volatile ("" : "+m" (s.s), "+r" (q)); + s.s += 4; q++; + } + if (s.s != 6 + q * 12) abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/udr-3.C b/libgomp/testsuite/libgomp.c++/udr-3.C new file mode 100644 index 0000000..74a0138 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/udr-3.C @@ -0,0 +1,149 @@ +// { dg-do run } + +extern "C" void abort (); + +void +dblinit (double *p) +{ + *p = 2.0; +} + +namespace NS +{ + template <int N> + struct U + { + void foo (U &, bool); + U (); + }; + template <int N> + struct S + { + int s; + #pragma omp declare reduction (foo : U<0>, S : omp_out.foo (omp_in, false)) + #pragma omp declare reduction (foo : int : omp_out += omp_in) \ + initializer (omp_priv = N + 2) + #pragma omp declare reduction (foo : double : omp_out += omp_in) \ + initializer (dblinit (&omp_priv)) + void baz (int v) + { + S s; + int q = 0; + if (s.s != 6 || v != 0) abort (); + s.s = 20; + double d = 4.0; + #pragma omp parallel num_threads (4) reduction (foo : s, v, d) \ + reduction (::NS::U<N>::operator + : q) + { + if (s.s != 6 || q != 0 || v != N + 2 || d != 2.0) abort (); + asm volatile ("" : "+m" (s.s), "+r" (q), "+r" (v)); + s.s++; q++; v++; + } + if (s.s != 20 + q * 7 || (N + 3) * q != v || d != 4.0 + 2.0 * q) + abort (); + } + void foo (S &x) { s += x.s; } + void foo (S &x, bool y) { s += x.s; if (y) abort (); } + S (const S &x) { s = x.s + 1; } + S (const S &x, bool y) { s = x.s + 2; if (y) abort (); } + S () { s = 6; } + S (int x) { s = x; } + ~S (); + }; + #pragma omp declare reduction (bar : S<1> : omp_out.foo (omp_in)) \ + initializer (omp_priv (8)) +} + +template <int N> +NS::S<N>::~S () +{ + if (s < 6) abort (); + s = -1; + /* Ensure the above store is not DSEd. */ + asm volatile ("" : : "r" (&s) : "memory"); +} + +template <int N> +struct T : public NS::S<N> +{ + void baz () + { + NS::S<N> s; + int q = 0; + if (s.s != 6) abort (); + #pragma omp parallel num_threads (4) reduction (foo:s) \ + reduction (+: q) + { + if (s.s != 6 || q != 0) abort (); + asm volatile ("" : "+m" (s.s), "+r" (q)); + s.s += 2; q++; + } + if (s.s != 6 + q * 8) abort (); + } +}; + +struct W +{ + int v; + W () : v (6) {} + ~W () {} +}; + +template <typename T, typename D> +struct V +{ + #pragma omp declare reduction (baz: T: omp_out.s += omp_in.s) \ + initializer (omp_priv (11)) + #pragma omp declare reduction (baz: D: omp_out += omp_in) \ + initializer (dblinit (&omp_priv)) + static void dblinit (D *x) { *x = 3.0; } + void baz () + { + T t; + V v; + int q = 0; + D d = 4.0; + if (t.s != 6 || v.v != 4) abort (); + #pragma omp declare reduction (+ : V, W : omp_out.v -= omp_in.v) \ + initializer (omp_priv (12)) + { + #pragma omp declare reduction (+ : W, V : omp_out.v += omp_in.v) \ + initializer (omp_priv (9)) + #pragma omp parallel num_threads (4) reduction (+: v, q) \ + reduction (baz: t, d) + { + if (t.s != 11 || v.v != 9 || q != 0 || d != 3.0) abort (); + asm volatile ("" : "+m" (t.s), "+m" (v.v), "+r" (q)); + t.s += 2; v.v += 3; q++; + } + if (t.s != 6 + 13 * q || v.v != 4 + 12 * q || d != 4.0 + 3.0 * q) + abort (); + } + } + int v; + V () : v (4) {} + V (int x) : v (x) {} + ~V () {} +}; + +int +main () +{ + NS::S<0> u; + u.baz (0); + T<2> t; + t.baz (); + NS::S<1> s; + int q = 0; + if (s.s != 6) abort (); + // Test ADL + #pragma omp parallel num_threads (4) reduction (bar:s) reduction (+:q) + { + if (s.s != 8 || q != 0) abort (); + asm volatile ("" : "+m" (s.s), "+r" (q)); + s.s += 4; q++; + } + if (s.s != 6 + q * 12) abort (); + V <NS::S <0>, double> v; + v.baz (); +} diff --git a/libgomp/testsuite/libgomp.c++/udr-4.C b/libgomp/testsuite/libgomp.c++/udr-4.C new file mode 100644 index 0000000..3e71746 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/udr-4.C @@ -0,0 +1,32 @@ +// { dg-do run } + +extern "C" void abort (); + +struct S +{ + int s; + S () : s (0) {} + ~S () {} +}; + +#pragma omp declare reduction (+:S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:int:omp_out += omp_in) + +int +main () +{ + int i, u = 0, q = 0; + S s, t; + if (s.s != 0 || t.s != 0) abort (); + #pragma omp parallel reduction(+:s, q) reduction(foo:t, u) + { + if (s.s != 0 || t.s != 0 || u != 0 || q != 0) abort (); + s.s = 6; + t.s = 8; + u = 9; + q++; + } + if (s.s != 6 * q || t.s != 8 * q || u != 9 * q) abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/udr-5.C b/libgomp/testsuite/libgomp.c++/udr-5.C new file mode 100644 index 0000000..91ae2f6 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/udr-5.C @@ -0,0 +1,49 @@ +// { dg-do run } + +extern "C" void abort (); + +struct S +{ + void foo () + { + S s; + int j = 0; + #pragma omp declare reduction (bar : int : omp_out += omp_in) + #pragma omp parallel reduction (bar : s) reduction(S::operator+ : j) + s.a = 4, j = 1; + if (s.a != 4 * j) abort (); + } + #pragma omp declare reduction (bar : S : baz (omp_out, omp_in)) + static void baz (S &x, S &y) { x.a += y.a; } + S () : a (0) {} + int a; +}; + +template <int N> +struct T +{ + void foo () + { + S s; + T t; + int j = 0; + #pragma omp declare reduction (bar : int : omp_out += omp_in) + #pragma omp parallel reduction (bar : t) reduction (S::bar : s) \ + reduction(T<N>::operator+ : j) + s.a = 4, t.a = 5, j = 1; + if (s.a != 4 * j || t.a != 5 * j) abort (); + } + #pragma omp declare reduction (bar : T<N> : baz (omp_out, omp_in)) + static void baz (T &x, T &y) { x.a += y.a; } + T () : a (N) {} + int a; +}; + +int +main () +{ + S s; + s.foo (); + T<0> t; + t.foo (); +} diff --git a/libgomp/testsuite/libgomp.c++/udr-6.C b/libgomp/testsuite/libgomp.c++/udr-6.C new file mode 100644 index 0000000..4be821e --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/udr-6.C @@ -0,0 +1,68 @@ +// { dg-do run } + +extern "C" void abort (); + +struct A { int a; A () : a (6) {} }; +struct B { int b; B () : b (5) {} }; +struct C { int c; C () : c (4) {} }; +struct D { int d; D () : d (3) {} }; +struct E : A, B {}; +struct F : C, D {}; +struct G : E, F {}; +void foo (B &); +void foo (F &); +#pragma omp declare reduction (+:B:omp_out.b += omp_in.b) \ + initializer(foo (omp_priv)) + +void +foo (B &x) +{ + if (x.b != 5) + abort (); + x.b = 9; +} + +template <typename T> +void bar (T &x, T &y, int z) +{ + if (z) + abort (); + x.a += y.a; +} + +namespace N1 +{ + struct A { int a; A () : a (0) {} }; + #pragma omp declare reduction (+:A:bar (omp_out, omp_in, 0)) +}; +namespace N2 +{ + struct B : N1::A { }; + #pragma omp declare reduction (+:N1::A:bar (omp_out, omp_in, 1)) +}; + +int +main () +{ + G g; + int i = 0; + #pragma omp parallel reduction(+:g, i) + { + if (g.a != 6 || g.b != 9 || g.c != 4 || g.d != 3) + abort (); + g.a = 1, g.b = 2, g.c = 3, g.d = 4, i = 1; + } + if (g.a != 6 || g.b != 5 + 2 * i || g.c != 4 || g.d != 3) + abort (); + N2::B b; + i = 0; + #pragma omp parallel reduction (+:b, i) + { + if (b.a != 0) + abort (); + b.a = 4; + i = 1; + } + if (b.a != 4 * i) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/udr-7.C b/libgomp/testsuite/libgomp.c++/udr-7.C new file mode 100644 index 0000000..6f66189 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/udr-7.C @@ -0,0 +1,72 @@ +// { dg-do run } + +extern "C" void abort (); + +struct S +{ + int s; + void foo (S &x) { s += x.s; } + S (const S &x) { s = x.s + 1; } + S () { s = 6; } + ~S () {} +}; + +void +bar (S &x, S &y) +{ + if (x.s != 6 || y.s != 6) + abort (); + x.s = 8; +} + +#pragma omp declare reduction (foo: S: omp_out.foo (omp_in)) \ + initializer (omp_priv (omp_orig)) +#pragma omp declare reduction (bar : S: omp_out.foo (omp_in)) \ + initializer (bar (omp_priv, omp_orig)) + +S +baz (S x) +{ + S r; + int i = 0; + if (x.s != 7 || r.s != 6) + abort (); + #pragma omp parallel reduction (foo: x) reduction (bar: r) \ + reduction (+: i) + { + if (x.s != 8 || r.s != 8) + abort (); + x.s = 12; + r.s = 14; + i = 1; + } + if (x.s != 7 + 12 * i || r.s != 6 + 14 * i) + abort (); + return r; +} + +void +baz (S &x, S &y) +{ + int i = 0, &j = i; + #pragma omp parallel reduction (foo: x) reduction (bar: y) \ + reduction (+: i) + { + if (x.s != 7 || y.s != 8) + abort (); + x.s = 12; + y.s = 14; + i = 1; + } + if (x.s != 6 + 12 * j || y.s != 6 + 14 * j) + abort (); +} + +int +main () +{ + S s; + baz (s); + S t, u; + baz (t, u); +} diff --git a/libgomp/testsuite/libgomp.c++/udr-8.C b/libgomp/testsuite/libgomp.c++/udr-8.C new file mode 100644 index 0000000..81c4beb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/udr-8.C @@ -0,0 +1,39 @@ +// { dg-do run } + +extern "C" void abort (); + +struct S; +void foo (S *, S *); +void bar (S &, S &); +#pragma omp declare reduction (+:S:foo (&omp_out, &omp_in)) +#pragma omp declare reduction (*:S:bar (omp_out, omp_in)) +struct S { int s; S () : s (0) {} }; + +void +foo (S *x, S *y) +{ + x->s += y->s; +} + +void +bar (S &x, S &y) +{ + x.s += y.s; +} + +int +main () +{ + S s, t; + int i = 0; + #pragma omp parallel reduction (+:s, i) reduction (*:t) + { + if (s.s != 0 || t.s != 0) + abort (); + s.s = 2; + t.s = 3; + i = 1; + } + if (s.s != 2 * i || t.s != 3 * i) + abort (); +} diff --git a/libgomp/testsuite/libgomp.c++/udr-9.C b/libgomp/testsuite/libgomp.c++/udr-9.C new file mode 100644 index 0000000..58fea18 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/udr-9.C @@ -0,0 +1,3 @@ +// { dg-do run } + +#include "../libgomp.c/udr-1.c" diff --git a/libgomp/testsuite/libgomp.c/affinity-1.c b/libgomp/testsuite/libgomp.c/affinity-1.c new file mode 100644 index 0000000..5d3e45d --- /dev/null +++ b/libgomp/testsuite/libgomp.c/affinity-1.c @@ -0,0 +1,1146 @@ +/* Affinity tests. + Copyright (C) 2013 Free Software Foundation, Inc. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_PROC_BIND "false" } */ +/* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl" { target *-*-linux* } } */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include "config.h" +#include <alloca.h> +#include <omp.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#ifdef DO_FORK +#include <signal.h> +#endif +#ifdef HAVE_PTHREAD_AFFINITY_NP +#include <sched.h> +#include <pthread.h> +#ifdef INTERPOSE_GETAFFINITY +#include <dlfcn.h> +#endif +#endif + +struct place +{ + int start, len; +}; +struct places +{ + char name[40]; + int count; + struct place places[8]; +} places_array[] = { + { "", 1, { { -1, -1 } } }, + { "{0}:8", 8, + { { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }, + { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }, + { "{7,6}:2:-3", 2, { { 6, 2 }, { 3, 2 } } }, + { "{6,7}:4:-2,!{2,3}", 3, { { 6, 2 }, { 4, 2 }, { 0, 2 } } }, + { "{1}:7:1", 7, + { { 1, 1 }, { 2, 1 }, { 3, 1 }, + { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }, + { "{0,1},{3,2,4},{6,5,!6},{6},{7:2:-1,!6}", 5, + { { 0, 2 }, { 2, 3 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } } +}; + +unsigned long contig_cpucount; +unsigned long min_cpusetsize; + +#if defined (HAVE_PTHREAD_AFFINITY_NP) && defined (_SC_NPROCESSORS_CONF) \ + && defined (CPU_ALLOC_SIZE) + +#if defined (RTLD_NEXT) && defined (INTERPOSE_GETAFFINITY) +int (*orig_getaffinity_np) (pthread_t, size_t, cpu_set_t *); + +int +pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset) +{ + int ret; + unsigned long i, max; + if (orig_getaffinity_np == NULL) + { + orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *)) + dlsym (RTLD_NEXT, "pthread_getaffinity_np"); + if (orig_getaffinity_np == NULL) + exit (0); + } + ret = orig_getaffinity_np (thread, cpusetsize, cpuset); + if (ret != 0) + return ret; + if (contig_cpucount == 0) + { + max = 8 * cpusetsize; + for (i = 0; i < max; i++) + if (!CPU_ISSET_S (i, cpusetsize, cpuset)) + break; + contig_cpucount = i; + min_cpusetsize = cpusetsize; + } + return ret; +} +#endif + +void +print_affinity (struct place p) +{ + static unsigned long size; + if (size == 0) + { + if (min_cpusetsize) + size = min_cpusetsize; + else + { + size = sysconf (_SC_NPROCESSORS_CONF); + size = CPU_ALLOC_SIZE (size); + if (size < sizeof (cpu_set_t)) + size = sizeof (cpu_set_t); + } + } + cpu_set_t *cpusetp = (cpu_set_t *) alloca (size); + if (pthread_getaffinity_np (pthread_self (), size, cpusetp) == 0) + { + unsigned long i, len, max = 8 * size; + int notfirst = 0, unexpected = 1; + + printf (" bound to {"); + for (i = 0, len = 0; i < max; i++) + if (CPU_ISSET_S (i, size, cpusetp)) + { + if (len == 0) + { + if (notfirst) + { + unexpected = 1; + printf (","); + } + else if (i == (unsigned long) p.start) + unexpected = 0; + notfirst = 1; + printf ("%lu", i); + } + ++len; + } + else + { + if (len && len != (unsigned long) p.len) + unexpected = 1; + if (len > 1) + printf (":%lu", len); + len = 0; + } + if (len && len != (unsigned long) p.len) + unexpected = 1; + if (len > 1) + printf (":%lu", len); + printf ("}"); + if (p.start != -1 && unexpected) + { + printf (", expected {%d", p.start); + if (p.len != 1) + printf (":%d", p.len); + printf ("} instead"); + } + else if (p.start != -1) + printf (", verified"); + } +} +#else +void +print_affinity (struct place p) +{ + (void) p.start; + (void) p.len; +} +#endif + + +int +main () +{ + char *env_proc_bind = getenv ("OMP_PROC_BIND"); + int test_false = env_proc_bind && strcmp (env_proc_bind, "false") == 0; + int test_true = env_proc_bind && strcmp (env_proc_bind, "true") == 0; + int test_spread_master_close + = env_proc_bind && strcmp (env_proc_bind, "spread,master,close") == 0; + char *env_places = getenv ("OMP_PLACES"); + int test_places = 0; + +#ifdef DO_FORK + if (env_places == NULL && contig_cpucount >= 8 && test_false + && getenv ("GOMP_AFFINITY") == NULL) + { + int i, j, status; + pid_t pid; + for (j = 0; j < 2; j++) + { + if (setenv ("OMP_PROC_BIND", j ? "spread,master,close" : "true", 1) + < 0) + break; + for (i = sizeof (places_array) / sizeof (places_array[0]) - 1; + i; --i) + { + if (setenv ("OMP_PLACES", places_array[i].name, 1) < 0) + break; + pid = fork (); + if (pid == -1) + break; + if (pid == 0) + { + execl ("/proc/self/exe", "affinity-1.exe", NULL); + _exit (1); + } + if (waitpid (pid, &status, 0) < 0) + break; + if (WIFSIGNALED (status) && WTERMSIG (status) == SIGABRT) + abort (); + else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0) + break; + } + if (i) + break; + } + } +#endif + + int first = 1; + if (env_proc_bind) + { + printf ("OMP_PROC_BIND='%s'", env_proc_bind); + first = 0; + } + if (env_places) + printf ("%sOMP_PLACES='%s'", first ? "" : " ", env_places); + printf ("\n"); + + if (env_places && contig_cpucount >= 8 + && (test_true || test_spread_master_close)) + { + for (test_places = sizeof (places_array) / sizeof (places_array[0]) - 1; + test_places; --test_places) + if (strcmp (env_places, places_array[test_places].name) == 0) + break; + } + +#define verify(if_true, if_s_m_c) \ + if (test_false && omp_get_proc_bind () != omp_proc_bind_false) \ + abort (); \ + if (test_true && omp_get_proc_bind () != if_true) \ + abort (); \ + if (test_spread_master_close && omp_get_proc_bind () != if_s_m_c) \ + abort (); + + verify (omp_proc_bind_true, omp_proc_bind_spread); + + printf ("Initial thread"); + print_affinity (places_array[test_places].places[0]); + printf ("\n"); + omp_set_nested (1); + omp_set_dynamic (0); + + #pragma omp parallel if (0) + { + verify (omp_proc_bind_true, omp_proc_bind_master); + #pragma omp parallel if (0) + { + verify (omp_proc_bind_true, omp_proc_bind_close); + #pragma omp parallel if (0) + { + verify (omp_proc_bind_true, omp_proc_bind_close); + } + #pragma omp parallel if (0) proc_bind (spread) + { + verify (omp_proc_bind_spread, omp_proc_bind_spread); + } + } + #pragma omp parallel if (0) proc_bind (master) + { + verify (omp_proc_bind_master, omp_proc_bind_close); + #pragma omp parallel if (0) + { + verify (omp_proc_bind_master, omp_proc_bind_close); + } + #pragma omp parallel if (0) proc_bind (spread) + { + verify (omp_proc_bind_spread, omp_proc_bind_spread); + } + } + } + + /* True/spread */ + #pragma omp parallel num_threads (4) + { + verify (omp_proc_bind_true, omp_proc_bind_master); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#1 thread %d", thr); + if (omp_get_num_threads () == 4 && test_spread_master_close) + switch (places_array[test_places].count) + { + case 8: + /* T = 4, P = 8, each subpartition has 2 places. */ + case 7: + /* T = 4, P = 7, each subpartition has 2 places, but + last partition, which has just one place. */ + p = places_array[test_places].places[2 * thr]; + break; + case 5: + /* T = 4, P = 5, first subpartition has 2 places, the + rest just one. */ + p = places_array[test_places].places[thr ? 1 + thr : 0]; + break; + case 3: + /* T = 4, P = 3, unit sized subpartitions, first gets + thr0 and thr3, second thr1, third thr2. */ + p = places_array[test_places].places[thr == 3 ? 0 : thr]; + break; + case 2: + /* T = 4, P = 2, unit sized subpartitions, each with + 2 threads. */ + p = places_array[test_places].places[thr / 2]; + break; + } + print_affinity (p); + printf ("\n"); + } + #pragma omp barrier + if (omp_get_thread_num () == 3) + { + /* True/spread, true/master. */ + #pragma omp parallel num_threads (3) + { + verify (omp_proc_bind_true, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#1,#1 thread 3,%d", thr); + if (omp_get_num_threads () == 3 && test_spread_master_close) + /* Outer is spread, inner master, so just bind to the + place or the master thread, which is thr 3 above. */ + switch (places_array[test_places].count) + { + case 8: + case 7: + p = places_array[test_places].places[6]; + break; + case 5: + p = places_array[test_places].places[4]; + break; + case 3: + p = places_array[test_places].places[0]; + break; + case 2: + p = places_array[test_places].places[1]; + break; + } + print_affinity (p); + printf ("\n"); + } + } + /* True/spread, spread. */ + #pragma omp parallel num_threads (5) proc_bind (spread) + { + verify (omp_proc_bind_spread, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#1,#2 thread 3,%d", thr); + if (omp_get_num_threads () == 5 && test_spread_master_close) + /* Outer is spread, inner spread. */ + switch (places_array[test_places].count) + { + case 8: + /* T = 5, P = 2, unit sized subpartitions. */ + p = places_array[test_places].places[thr == 4 ? 6 + : 6 + thr / 2]; + break; + /* The rest are T = 5, P = 1. */ + case 7: + p = places_array[test_places].places[6]; + break; + case 5: + p = places_array[test_places].places[4]; + break; + case 3: + p = places_array[test_places].places[0]; + break; + case 2: + p = places_array[test_places].places[1]; + break; + } + print_affinity (p); + printf ("\n"); + } + #pragma omp barrier + if (omp_get_thread_num () == 3) + { + /* True/spread, spread, close. */ + #pragma omp parallel num_threads (5) proc_bind (close) + { + verify (omp_proc_bind_close, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#1,#2,#1 thread 3,3,%d", thr); + if (omp_get_num_threads () == 5 && test_spread_master_close) + /* Outer is spread, inner spread, innermost close. */ + switch (places_array[test_places].count) + { + /* All are T = 5, P = 1. */ + case 8: + p = places_array[test_places].places[7]; + break; + case 7: + p = places_array[test_places].places[6]; + break; + case 5: + p = places_array[test_places].places[4]; + break; + case 3: + p = places_array[test_places].places[0]; + break; + case 2: + p = places_array[test_places].places[1]; + break; + } + print_affinity (p); + printf ("\n"); + } + } + } + } + /* True/spread, master. */ + #pragma omp parallel num_threads (4) proc_bind(master) + { + verify (omp_proc_bind_master, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#1,#3 thread 3,%d", thr); + if (omp_get_num_threads () == 4 && test_spread_master_close) + /* Outer is spread, inner master, so just bind to the + place or the master thread, which is thr 3 above. */ + switch (places_array[test_places].count) + { + case 8: + case 7: + p = places_array[test_places].places[6]; + break; + case 5: + p = places_array[test_places].places[4]; + break; + case 3: + p = places_array[test_places].places[0]; + break; + case 2: + p = places_array[test_places].places[1]; + break; + } + print_affinity (p); + printf ("\n"); + } + } + /* True/spread, close. */ + #pragma omp parallel num_threads (6) proc_bind (close) + { + verify (omp_proc_bind_close, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#1,#4 thread 3,%d", thr); + if (omp_get_num_threads () == 6 && test_spread_master_close) + /* Outer is spread, inner close. */ + switch (places_array[test_places].count) + { + case 8: + /* T = 6, P = 2, unit sized subpartitions. */ + p = places_array[test_places].places[6 + thr / 3]; + break; + /* The rest are T = 6, P = 1. */ + case 7: + p = places_array[test_places].places[6]; + break; + case 5: + p = places_array[test_places].places[4]; + break; + case 3: + p = places_array[test_places].places[0]; + break; + case 2: + p = places_array[test_places].places[1]; + break; + } + print_affinity (p); + printf ("\n"); + } + } + } + } + + /* Spread. */ + #pragma omp parallel num_threads (5) proc_bind(spread) + { + verify (omp_proc_bind_spread, omp_proc_bind_master); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#2 thread %d", thr); + if (omp_get_num_threads () == 5 + && (test_spread_master_close || test_true)) + switch (places_array[test_places].count) + { + case 8: + /* T = 5, P = 8, first 3 subpartitions have 2 places, last + 2 one place. */ + p = places_array[test_places].places[thr < 3 ? 2 * thr : 3 + thr]; + break; + case 7: + /* T = 5, P = 7, first 2 subpartitions have 2 places, last + 3 one place. */ + p = places_array[test_places].places[thr < 2 ? 2 * thr : 2 + thr]; + break; + case 5: + /* T = 5, P = 5, unit sized subpartitions, each one with one + thread. */ + p = places_array[test_places].places[thr]; + break; + case 3: + /* T = 5, P = 3, unit sized subpartitions, first gets + thr0 and thr3, second thr1 and thr4, third thr2. */ + p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr]; + break; + case 2: + /* T = 5, P = 2, unit sized subpartitions, first with + thr{0,1,4} and second with thr{2,3}. */ + p = places_array[test_places].places[thr == 4 ? 0 : thr / 2]; + break; + } + print_affinity (p); + printf ("\n"); + } + #pragma omp barrier + if (omp_get_thread_num () == 3) + { + int pp = 0; + switch (places_array[test_places].count) + { + case 8: pp = 6; break; + case 7: pp = 5; break; + case 5: pp = 3; break; + case 2: pp = 1; break; + } + /* Spread, spread/master. */ + #pragma omp parallel num_threads (3) firstprivate (pp) + { + verify (omp_proc_bind_spread, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#2,#1 thread 3,%d", thr); + if (test_spread_master_close || test_true) + /* Outer is spread, inner spread resp. master, bit we have + just unit sized partitions. */ + p = places_array[test_places].places[pp]; + print_affinity (p); + printf ("\n"); + } + } + /* Spread, spread. */ + #pragma omp parallel num_threads (5) proc_bind (spread) \ + firstprivate (pp) + { + verify (omp_proc_bind_spread, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#2,#2 thread 3,%d", thr); + if (test_spread_master_close || test_true) + /* Outer is spread, inner spread, bit we have + just unit sized partitions. */ + p = places_array[test_places].places[pp]; + print_affinity (p); + printf ("\n"); + } + } + /* Spread, master. */ + #pragma omp parallel num_threads (4) proc_bind(master) \ + firstprivate(pp) + { + verify (omp_proc_bind_master, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#2,#3 thread 3,%d", thr); + if (test_spread_master_close || test_true) + /* Outer is spread, inner master, bit we have + just unit sized partitions. */ + p = places_array[test_places].places[pp]; + print_affinity (p); + printf ("\n"); + } + } + /* Spread, close. */ + #pragma omp parallel num_threads (6) proc_bind (close) \ + firstprivate (pp) + { + verify (omp_proc_bind_close, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#2,#4 thread 3,%d", thr); + if (test_spread_master_close || test_true) + /* Outer is spread, inner close, bit we have + just unit sized partitions. */ + p = places_array[test_places].places[pp]; + print_affinity (p); + printf ("\n"); + } + } + } + } + + /* Master. */ + #pragma omp parallel num_threads (3) proc_bind(master) + { + verify (omp_proc_bind_master, omp_proc_bind_master); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#3 thread %d", thr); + if (test_spread_master_close || test_true) + p = places_array[test_places].places[0]; + print_affinity (p); + printf ("\n"); + } + #pragma omp barrier + if (omp_get_thread_num () == 2) + { + /* Master, master. */ + #pragma omp parallel num_threads (4) + { + verify (omp_proc_bind_master, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#3,#1 thread 2,%d", thr); + if (test_spread_master_close || test_true) + /* Outer is master, inner is master. */ + p = places_array[test_places].places[0]; + print_affinity (p); + printf ("\n"); + } + } + /* Master, spread. */ + #pragma omp parallel num_threads (4) proc_bind (spread) + { + verify (omp_proc_bind_spread, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#3,#2 thread 2,%d", thr); + if (omp_get_num_threads () == 4 + && (test_spread_master_close || test_true)) + /* Outer is master, inner is spread. */ + switch (places_array[test_places].count) + { + case 8: + /* T = 4, P = 8, each subpartition has 2 places. */ + case 7: + /* T = 4, P = 7, each subpartition has 2 places, but + last partition, which has just one place. */ + p = places_array[test_places].places[2 * thr]; + break; + case 5: + /* T = 4, P = 5, first subpartition has 2 places, the + rest just one. */ + p = places_array[test_places].places[thr ? 1 + thr : 0]; + break; + case 3: + /* T = 4, P = 3, unit sized subpartitions, first gets + thr0 and thr3, second thr1, third thr2. */ + p = places_array[test_places].places[thr == 3 ? 0 : thr]; + break; + case 2: + /* T = 4, P = 2, unit sized subpartitions, each with + 2 threads. */ + p = places_array[test_places].places[thr / 2]; + break; + } + print_affinity (p); + printf ("\n"); + } + #pragma omp barrier + if (omp_get_thread_num () == 0) + { + /* Master, spread, close. */ + #pragma omp parallel num_threads (5) proc_bind (close) + { + verify (omp_proc_bind_close, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#3,#2,#1 thread 2,0,%d", thr); + if (omp_get_num_threads () == 5 + && (test_spread_master_close || test_true)) + /* Outer is master, inner spread, innermost close. */ + switch (places_array[test_places].count) + { + /* First 3 are T = 5, P = 2. */ + case 8: + case 7: + case 5: + p = places_array[test_places].places[(thr & 2) / 2]; + break; + /* All the rest are T = 5, P = 1. */ + case 3: + case 2: + p = places_array[test_places].places[0]; + break; + } + print_affinity (p); + printf ("\n"); + } + } + } + #pragma omp barrier + if (omp_get_thread_num () == 3) + { + /* Master, spread, close. */ + #pragma omp parallel num_threads (5) proc_bind (close) + { + verify (omp_proc_bind_close, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#3,#2,#2 thread 2,3,%d", thr); + if (omp_get_num_threads () == 5 + && (test_spread_master_close || test_true)) + /* Outer is master, inner spread, innermost close. */ + switch (places_array[test_places].count) + { + case 8: + /* T = 5, P = 2. */ + p = places_array[test_places].places[6 + + (thr & 2) / 2]; + break; + /* All the rest are T = 5, P = 1. */ + case 7: + p = places_array[test_places].places[6]; + break; + case 5: + p = places_array[test_places].places[4]; + break; + case 3: + p = places_array[test_places].places[0]; + break; + case 2: + p = places_array[test_places].places[1]; + break; + } + print_affinity (p); + printf ("\n"); + } + } + } + } + /* Master, master. */ + #pragma omp parallel num_threads (4) proc_bind(master) + { + verify (omp_proc_bind_master, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#3,#3 thread 2,%d", thr); + if (test_spread_master_close || test_true) + /* Outer is master, inner master. */ + p = places_array[test_places].places[0]; + print_affinity (p); + printf ("\n"); + } + } + /* Master, close. */ + #pragma omp parallel num_threads (6) proc_bind (close) + { + verify (omp_proc_bind_close, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#3,#4 thread 2,%d", thr); + if (omp_get_num_threads () == 6 + && (test_spread_master_close || test_true)) + switch (places_array[test_places].count) + { + case 8: + /* T = 6, P = 8. */ + case 7: + /* T = 6, P = 7. */ + p = places_array[test_places].places[thr]; + break; + case 5: + /* T = 6, P = 5. thr{0,5} go into the first place. */ + p = places_array[test_places].places[thr == 5 ? 0 : thr]; + break; + case 3: + /* T = 6, P = 3, two threads into each place. */ + p = places_array[test_places].places[thr / 2]; + break; + case 2: + /* T = 6, P = 2, 3 threads into each place. */ + p = places_array[test_places].places[thr / 3]; + break; + } + print_affinity (p); + printf ("\n"); + } + } + } + } + + #pragma omp parallel num_threads (5) proc_bind(close) + { + verify (omp_proc_bind_close, omp_proc_bind_master); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#4 thread %d", thr); + if (omp_get_num_threads () == 5 + && (test_spread_master_close || test_true)) + switch (places_array[test_places].count) + { + case 8: + /* T = 5, P = 8. */ + case 7: + /* T = 5, P = 7. */ + case 5: + /* T = 5, P = 5. */ + p = places_array[test_places].places[thr]; + break; + case 3: + /* T = 5, P = 3, thr{0,3} in first place, thr{1,4} in second, + thr2 in third. */ + p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr]; + break; + case 2: + /* T = 5, P = 2, thr{0,1,4} in first place, thr{2,3} in second. */ + p = places_array[test_places].places[thr == 4 ? 0 : thr / 2]; + break; + } + print_affinity (p); + printf ("\n"); + } + #pragma omp barrier + if (omp_get_thread_num () == 2) + { + int pp = 0; + switch (places_array[test_places].count) + { + case 8: + case 7: + case 5: + case 3: + pp = 2; + break; + case 2: + pp = 1; + break; + } + /* Close, close/master. */ + #pragma omp parallel num_threads (4) firstprivate (pp) + { + verify (omp_proc_bind_close, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#4,#1 thread 2,%d", thr); + if (test_spread_master_close) + /* Outer is close, inner is master. */ + p = places_array[test_places].places[pp]; + else if (omp_get_num_threads () == 4 && test_true) + /* Outer is close, inner is close. */ + switch (places_array[test_places].count) + { + case 8: + /* T = 4, P = 8. */ + case 7: + /* T = 4, P = 7. */ + p = places_array[test_places].places[2 + thr]; + break; + case 5: + /* T = 4, P = 5. There is wrap-around for thr3. */ + p = places_array[test_places].places[thr == 3 ? 0 : 2 + thr]; + break; + case 3: + /* T = 4, P = 3, thr{0,3} go into p2, thr1 into p0, thr2 + into p1. */ + p = places_array[test_places].places[(2 + thr) % 3]; + break; + case 2: + /* T = 4, P = 2, 2 threads into each place. */ + p = places_array[test_places].places[1 - thr / 2]; + break; + } + + print_affinity (p); + printf ("\n"); + } + } + /* Close, spread. */ + #pragma omp parallel num_threads (4) proc_bind (spread) + { + verify (omp_proc_bind_spread, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#4,#2 thread 2,%d", thr); + if (omp_get_num_threads () == 4 + && (test_spread_master_close || test_true)) + /* Outer is close, inner is spread. */ + switch (places_array[test_places].count) + { + case 8: + /* T = 4, P = 8, each subpartition has 2 places. */ + case 7: + /* T = 4, P = 7, each subpartition has 2 places, but + last partition, which has just one place. */ + p = places_array[test_places].places[thr == 3 ? 0 + : 2 + 2 * thr]; + break; + case 5: + /* T = 4, P = 5, first subpartition has 2 places, the + rest just one. */ + p = places_array[test_places].places[thr == 3 ? 0 + : 2 + thr]; + break; + case 3: + /* T = 4, P = 3, unit sized subpartitions, third gets + thr0 and thr3, first thr1, second thr2. */ + p = places_array[test_places].places[thr == 0 ? 2 : thr - 1]; + break; + case 2: + /* T = 4, P = 2, unit sized subpartitions, each with + 2 threads. */ + p = places_array[test_places].places[1 - thr / 2]; + break; + } + print_affinity (p); + printf ("\n"); + } + #pragma omp barrier + if (omp_get_thread_num () == 0) + { + /* Close, spread, close. */ + #pragma omp parallel num_threads (5) proc_bind (close) + { + verify (omp_proc_bind_close, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#4,#2,#1 thread 2,0,%d", thr); + if (omp_get_num_threads () == 5 + && (test_spread_master_close || test_true)) + /* Outer is close, inner spread, innermost close. */ + switch (places_array[test_places].count) + { + case 8: + case 7: + /* T = 5, P = 2. */ + p = places_array[test_places].places[2 + + (thr & 2) / 2]; + break; + /* All the rest are T = 5, P = 1. */ + case 5: + case 3: + p = places_array[test_places].places[2]; + break; + case 2: + p = places_array[test_places].places[1]; + break; + } + print_affinity (p); + printf ("\n"); + } + } + } + #pragma omp barrier + if (omp_get_thread_num () == 2) + { + /* Close, spread, close. */ + #pragma omp parallel num_threads (5) proc_bind (close) + { + verify (omp_proc_bind_close, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#4,#2,#2 thread 2,2,%d", thr); + if (omp_get_num_threads () == 5 + && (test_spread_master_close || test_true)) + /* Outer is close, inner spread, innermost close. */ + switch (places_array[test_places].count) + { + case 8: + /* T = 5, P = 2. */ + p = places_array[test_places].places[6 + + (thr & 2) / 2]; + break; + /* All the rest are T = 5, P = 1. */ + case 7: + p = places_array[test_places].places[6]; + break; + case 5: + p = places_array[test_places].places[4]; + break; + case 3: + p = places_array[test_places].places[1]; + break; + case 2: + p = places_array[test_places].places[0]; + break; + } + print_affinity (p); + printf ("\n"); + } + } + } + #pragma omp barrier + if (omp_get_thread_num () == 3) + { + /* Close, spread, close. */ + #pragma omp parallel num_threads (5) proc_bind (close) + { + verify (omp_proc_bind_close, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#4,#2,#3 thread 2,3,%d", thr); + if (omp_get_num_threads () == 5 + && (test_spread_master_close || test_true)) + /* Outer is close, inner spread, innermost close. */ + switch (places_array[test_places].count) + { + case 8: + case 7: + case 5: + /* T = 5, P = 2. */ + p = places_array[test_places].places[(thr & 2) / 2]; + break; + /* All the rest are T = 5, P = 1. */ + case 3: + p = places_array[test_places].places[2]; + break; + case 2: + p = places_array[test_places].places[0]; + break; + } + print_affinity (p); + printf ("\n"); + } + } + } + } + /* Close, master. */ + #pragma omp parallel num_threads (4) proc_bind(master) \ + firstprivate (pp) + { + verify (omp_proc_bind_master, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#4,#3 thread 2,%d", thr); + if (test_spread_master_close || test_true) + /* Outer is close, inner master. */ + p = places_array[test_places].places[pp]; + print_affinity (p); + printf ("\n"); + } + } + /* Close, close. */ + #pragma omp parallel num_threads (6) proc_bind (close) + { + verify (omp_proc_bind_close, omp_proc_bind_close); + #pragma omp critical + { + struct place p = places_array[0].places[0]; + int thr = omp_get_thread_num (); + printf ("#4,#4 thread 2,%d", thr); + if (omp_get_num_threads () == 6 + && (test_spread_master_close || test_true)) + switch (places_array[test_places].count) + { + case 8: + /* T = 6, P = 8. */ + p = places_array[test_places].places[2 + thr]; + break; + case 7: + /* T = 6, P = 7. */ + p = places_array[test_places].places[thr == 5 ? 0 : 2 + thr]; + break; + case 5: + /* T = 6, P = 5. thr{0,5} go into the third place. */ + p = places_array[test_places].places[thr >= 3 ? thr - 3 + : 2 + thr]; + break; + case 3: + /* T = 6, P = 3, two threads into each place. */ + p = places_array[test_places].places[thr < 2 ? 2 + : thr / 2 - 1]; + break; + case 2: + /* T = 6, P = 2, 3 threads into each place. */ + p = places_array[test_places].places[1 - thr / 3]; + break; + } + print_affinity (p); + printf ("\n"); + } + } + } + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/atomic-14.c b/libgomp/testsuite/libgomp.c/atomic-14.c index 5936650..9046d80 100644 --- a/libgomp/testsuite/libgomp.c/atomic-14.c +++ b/libgomp/testsuite/libgomp.c/atomic-14.c @@ -16,7 +16,7 @@ main () #pragma omp atomic update x = x + 7; #pragma omp atomic - x = x + 7 + 6; + x = x + (7 + 6); #pragma omp atomic update x = x + 2 * 3; #pragma omp atomic @@ -65,7 +65,7 @@ main () if (v != -8) abort (); #pragma omp atomic - x = x * -4 / 2; + x = x * (-4 / 2); #pragma omp atomic read v = x; if (v != 16) diff --git a/libgomp/testsuite/libgomp.c/atomic-15.c b/libgomp/testsuite/libgomp.c/atomic-15.c new file mode 100644 index 0000000..58331f4 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/atomic-15.c @@ -0,0 +1,99 @@ +// { dg-do run } + +extern void abort (void); +int x = 6; + +int +main () +{ + int v, l = 2, s = 1; + #pragma omp atomic + x = -3 + x; + #pragma omp atomic read + v = x; + if (v != 3) + abort (); + #pragma omp atomic update + x = 3 * 2 * 1 + x; + #pragma omp atomic read + v = x; + if (v != 9) + abort (); + #pragma omp atomic capture + v = x = x | 16; + if (v != 25) + abort (); + #pragma omp atomic capture + v = x = x + 14 * 2 / 4; + if (v != 32) + abort (); + #pragma omp atomic capture + v = x = 5 | x; + if (v != 37) + abort (); + #pragma omp atomic capture + v = x = 40 + 12 - 2 - 7 - x; + if (v != 6) + abort (); + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + #pragma omp atomic capture + { v = x; x = 3 + x; } + if (v != 6) + abort (); + #pragma omp atomic capture + { v = x; x = -1 * -1 * -1 * -1 - x; } + if (v != 9) + abort (); + #pragma omp atomic read + v = x; + if (v != -8) + abort (); + #pragma omp atomic capture + { x = 2 * 2 - x; v = x; } + if (v != 12) + abort (); + #pragma omp atomic capture + { x = 7 & x; v = x; } + if (v != 4) + abort (); + #pragma omp atomic capture + { v = x; x = 6; } + if (v != 4) + abort (); + #pragma omp atomic read + v = x; + if (v != 6) + abort (); + #pragma omp atomic capture + { v = x; x = 7 * 8 + 23; } + if (v != 6) + abort (); + #pragma omp atomic read + v = x; + if (v != 79) + abort (); + #pragma omp atomic capture + { v = x; x = 23 + 6 * 4; } + if (v != 79) + abort (); + #pragma omp atomic read + v = x; + if (v != 47) + abort (); + #pragma omp atomic capture + { v = x; x = l ? 17 : 12; } + if (v != 47) + abort (); + #pragma omp atomic capture + { v = x; x = l = s++ + 3; } + if (v != 17 || l != 4 || s != 2) + abort (); + #pragma omp atomic read + v = x; + if (v != 4) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/atomic-16.c b/libgomp/testsuite/libgomp.c/atomic-16.c new file mode 100644 index 0000000..d33f670 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/atomic-16.c @@ -0,0 +1,58 @@ +// { dg-do run } + +extern void abort (void); +int x = 6, cnt; + +int +foo (void) +{ + return cnt++; +} + +int +main () +{ + int v, *p; + p = &x; + #pragma omp atomic update + p[foo (), 0] = 16 + 6 - p[foo (), 0]; + #pragma omp atomic read + v = x; + if (cnt != 2 || v != 16) + abort (); + #pragma omp atomic capture + v = p[foo () + foo (), 0] = p[foo () + foo (), 0] + 3; + if (cnt != 6 || v != 19) + abort (); + #pragma omp atomic capture + v = p[foo (), 0] = 12 * 1 / 2 + (foo (), 0) + p[foo (), 0]; + if (cnt != 9 || v != 25) + abort (); + #pragma omp atomic capture + { + v = p[foo () & 0]; p[foo () & 0] = (foo (), 1) * 9 - p[foo () & 0]; + } + if (cnt != 13 || v != 25) + abort (); + #pragma omp atomic read + v = x; + if (v != -16) + abort (); + #pragma omp atomic capture + { + p[0 & foo ()] = 16 - 2 + 3 + p[0 & foo ()]; v = p[0 & foo ()]; + } + if (cnt != 16 || v != 1) + abort (); + #pragma omp atomic capture + { + v = p[foo (), 0]; p[foo (), 0] = (foo (), 7) ? 13 : foo () + 6; + } + if (cnt != 19 || v != 1) + abort (); + #pragma omp atomic read + v = x; + if (v != 13) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/atomic-17.c b/libgomp/testsuite/libgomp.c/atomic-17.c new file mode 100644 index 0000000..2bd0e9b --- /dev/null +++ b/libgomp/testsuite/libgomp.c/atomic-17.c @@ -0,0 +1,99 @@ +// { dg-do run } + +extern void abort (void); +int x = 6; + +int +main () +{ + int v, l = 2, s = 1; + #pragma omp atomic seq_cst + x = -3 + x; + #pragma omp atomic read seq_cst + v = x; + if (v != 3) + abort (); + #pragma omp atomic update seq_cst + x = 3 * 2 * 1 + x; + #pragma omp atomic read seq_cst + v = x; + if (v != 9) + abort (); + #pragma omp atomic capture seq_cst + v = x = x | 16; + if (v != 25) + abort (); + #pragma omp atomic capture seq_cst + v = x = x + 14 * 2 / 4; + if (v != 32) + abort (); + #pragma omp atomic capture seq_cst + v = x = 5 | x; + if (v != 37) + abort (); + #pragma omp atomic capture seq_cst + v = x = 40 + 12 - 2 - 7 - x; + if (v != 6) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 6) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = 3 + x; } + if (v != 6) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = -1 * -1 * -1 * -1 - x; } + if (v != 9) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != -8) + abort (); + #pragma omp atomic capture seq_cst + { x = 2 * 2 - x; v = x; } + if (v != 12) + abort (); + #pragma omp atomic capture seq_cst + { x = 7 & x; v = x; } + if (v != 4) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = 6; } + if (v != 4) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 6) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = 7 * 8 + 23; } + if (v != 6) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 79) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = 23 + 6 * 4; } + if (v != 79) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 47) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = l ? 17 : 12; } + if (v != 47) + abort (); + #pragma omp atomic capture seq_cst + { v = x; x = l = s++ + 3; } + if (v != 17 || l != 4 || s != 2) + abort (); + #pragma omp atomic read seq_cst + v = x; + if (v != 4) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/cancel-for-1.c b/libgomp/testsuite/libgomp.c/cancel-for-1.c new file mode 100644 index 0000000..f805f13 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/cancel-for-1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_CANCELLATION "true" } */ + +#include <stdlib.h> +#include <omp.h> + +int +main () +{ + #pragma omp parallel num_threads (32) + { + int i; + #pragma omp for + for (i = 0; i < 1000; ++i) + { + #pragma omp cancel for + if (omp_get_cancellation ()) + abort (); + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/cancel-for-2.c b/libgomp/testsuite/libgomp.c/cancel-for-2.c new file mode 100644 index 0000000..30cfbb1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/cancel-for-2.c @@ -0,0 +1,95 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_CANCELLATION "true" } */ + +#include <stdlib.h> +#include <omp.h> + +__attribute__((noinline, noclone)) int +foo (int *x) +{ + int v = 0, w = 0; + #pragma omp parallel num_threads (32) shared (v, w) + { + int i; + #pragma omp for + for (i = 0; i < 1000; ++i) + { + #pragma omp cancel for if (x[0]) + abort (); + } + #pragma omp for + for (i = 0; i < 1000; ++i) + { + #pragma omp cancel for if (x[1]) + #pragma omp atomic + v++; + } + #pragma omp for + for (i = 0; i < 1000; ++i) + { + #pragma omp cancel for if (x[2]) + #pragma omp atomic + w += 8; + } + #pragma omp for + for (i = 0; i < 1000; ++i) + { + #pragma omp cancel for if (x[3]) + #pragma omp atomic + v += 2; + } + } + if (v != 3000 || w != 0) + abort (); + #pragma omp parallel num_threads (32) shared (v, w) + { + int i; + /* None of these cancel directives should actually cancel anything, + but the compiler shouldn't know that and thus should use cancellable + barriers at the end of all the workshares. */ + #pragma omp cancel parallel if (omp_get_thread_num () == 1 && x[4]) + #pragma omp for + for (i = 0; i < 1000; ++i) + { + #pragma omp cancel for if (x[0]) + abort (); + } + #pragma omp cancel parallel if (omp_get_thread_num () == 2 && x[4]) + #pragma omp for + for (i = 0; i < 1000; ++i) + { + #pragma omp cancel for if (x[1]) + #pragma omp atomic + v++; + } + #pragma omp cancel parallel if (omp_get_thread_num () == 3 && x[4]) + #pragma omp for + for (i = 0; i < 1000; ++i) + { + #pragma omp cancel for if (x[2]) + #pragma omp atomic + w += 8; + } + #pragma omp cancel parallel if (omp_get_thread_num () == 4 && x[4]) + #pragma omp for + for (i = 0; i < 1000; ++i) + { + #pragma omp cancel for if (x[3]) + #pragma omp atomic + v += 2; + } + #pragma omp cancel parallel if (omp_get_thread_num () == 5 && x[4]) + } + if (v != 6000 || w != 0) + abort (); + return 0; +} + +int +main () +{ + int x[] = { 1, 0, 1, 0, 0 }; + if (omp_get_cancellation ()) + foo (x); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/cancel-parallel-1.c b/libgomp/testsuite/libgomp.c/cancel-parallel-1.c new file mode 100644 index 0000000..614eb50 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/cancel-parallel-1.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_CANCELLATION "true" } */ + +#include <stdlib.h> +#include <omp.h> + +int +main () +{ + #pragma omp parallel num_threads (32) + { + #pragma omp cancel parallel + if (omp_get_cancellation ()) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/cancel-parallel-2.c b/libgomp/testsuite/libgomp.c/cancel-parallel-2.c new file mode 100644 index 0000000..cae0aa4 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/cancel-parallel-2.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_CANCELLATION "true" } */ + +#include <stdlib.h> +#include <unistd.h> +#include <omp.h> + +static void +foo (int *x) +{ + #pragma omp parallel firstprivate(x) num_threads (32) + { + int thr = omp_get_thread_num (); + switch (x[thr]) + { + case 4: + #pragma omp cancel parallel + break; + case 3: + #pragma omp task + usleep (1000); + #pragma omp task + usleep (2000); + #pragma omp task + usleep (4000); + break; + case 2: + usleep (1000); + /* FALLTHRU */ + case 1: + #pragma omp cancellation point parallel + break; + } + #pragma omp barrier + if (omp_get_cancellation ()) + abort (); + } +} + +int +main () +{ + int i, j, x[32] = { 0, 1, 2, 4, 2, 2, 1, 0 }; + foo (x); + for (i = 0; i < 32; i++) + { + for (j = 0; j < 32; j++) + x[j] = rand () & 3; + x[rand () & 31] = 4; + foo (x); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/cancel-parallel-3.c b/libgomp/testsuite/libgomp.c/cancel-parallel-3.c new file mode 100644 index 0000000..7ceaed1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/cancel-parallel-3.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_CANCELLATION "true" } */ + +#include <omp.h> +#include <unistd.h> + +static inline void +do_some_work (void) +{ + asm volatile ("" : : : "memory"); +} + +int +main () +{ + omp_set_dynamic (0); + omp_set_schedule (omp_sched_static, 1); + #pragma omp parallel num_threads (16) + { + int i, j; + do_some_work (); + #pragma omp barrier + if (omp_get_thread_num () == 1) + { + sleep (2); + #pragma omp cancellation point parallel + } + for (j = 3; j <= 16; j++) + #pragma omp for schedule (runtime) nowait + for (i = 0; i < j; i++) + do_some_work (); + if (omp_get_thread_num () == 0) + { + sleep (1); + #pragma omp cancel parallel + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/cancel-sections-1.c b/libgomp/testsuite/libgomp.c/cancel-sections-1.c new file mode 100644 index 0000000..e0cef0b --- /dev/null +++ b/libgomp/testsuite/libgomp.c/cancel-sections-1.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_CANCELLATION "true" } */ + +#include <stdlib.h> +#include <omp.h> + +int +main () +{ + if (!omp_get_cancellation ()) + return 0; + #pragma omp parallel num_threads (32) + { + #pragma omp sections + { + { + #pragma omp cancel sections + abort (); + } + #pragma omp section + { + #pragma omp cancel sections + abort (); + } + #pragma omp section + { + #pragma omp cancel sections + abort (); + } + #pragma omp section + { + #pragma omp cancel sections + abort (); + } + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/cancel-taskgroup-1.c b/libgomp/testsuite/libgomp.c/cancel-taskgroup-1.c new file mode 100644 index 0000000..5a80811 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/cancel-taskgroup-1.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_CANCELLATION "true" } */ + +#include <stdlib.h> +#include <omp.h> + +struct T { struct T *children[2]; int val; }; + +struct T * +search (struct T *tree, int val, int lvl) +{ + if (tree == NULL || tree->val == val) + return tree; + struct T *ret = NULL; + int i; + for (i = 0; i < 2; i++) + #pragma omp task shared(ret) if(lvl < 10) + { + struct T *r = search (tree->children[i], val, lvl + 1); + if (r) + { + #pragma omp atomic write + ret = r; + #pragma omp cancel taskgroup + } + } + #pragma omp taskwait + return ret; +} + +struct T * +searchp (struct T *tree, int val) +{ + struct T *ret; + #pragma omp parallel shared(ret) firstprivate (tree, val) + #pragma omp single + #pragma omp taskgroup + ret = search (tree, val, 0); + return ret; +} + +int +main () +{ + /* Must be power of two minus 1. */ + int size = 0x7ffff; + struct T *trees = (struct T *) malloc (size * sizeof (struct T)); + if (trees == NULL) + return 0; + int i, l = 1, b = 0; + for (i = 0; i < size; i++) + { + if (i == l) + { + b = l; + l = l * 2 + 1; + } + trees[i].val = i; + trees[i].children[0] = l == size ? NULL : &trees[l + (i - b) * 2]; + trees[i].children[1] = l == size ? NULL : &trees[l + (i - b) * 2 + 1]; + } + for (i = 0; i < 50; i++) + { + int v = random () & size; + if (searchp (&trees[0], v) != &trees[v]) + abort (); + } + free (trees); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/cancel-taskgroup-2.c b/libgomp/testsuite/libgomp.c/cancel-taskgroup-2.c new file mode 100644 index 0000000..c7b8bf7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/cancel-taskgroup-2.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_CANCELLATION "true" } */ + +#include <stdlib.h> +#include <unistd.h> +#include <omp.h> + +int +main () +{ + #pragma omp parallel + #pragma omp taskgroup + #pragma omp task + { + #pragma omp cancel taskgroup + if (omp_get_cancellation ()) + abort (); + } + #pragma omp parallel + { + #pragma omp barrier + #pragma omp single + #pragma omp taskgroup + { + int i; + for (i = 0; i < 50; i++) + #pragma omp task + { + #pragma omp cancellation point taskgroup + usleep (30); + #pragma omp cancel taskgroup if (i > 5) + } + } + usleep (10); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/depend-1.c b/libgomp/testsuite/libgomp.c/depend-1.c new file mode 100644 index 0000000..2db1205 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/depend-1.c @@ -0,0 +1,215 @@ +#include <stdlib.h> + +void +dep (void) +{ + int x = 1; + #pragma omp parallel + #pragma omp single + { + #pragma omp task shared (x) depend(out: x) + x = 2; + #pragma omp task shared (x) depend(in: x) + if (x != 2) + abort (); + } +} + +void +dep2 (void) +{ + #pragma omp parallel + #pragma omp single + { + int x = 1; + #pragma omp task shared (x) depend(out: x) + x = 2; + #pragma omp task shared (x) depend(in: x) + if (x != 2) + abort (); + #pragma omp taskwait + } +} + +void +dep3 (void) +{ + #pragma omp parallel + { + int x = 1; + #pragma omp single + { + #pragma omp task shared (x) depend(out: x) + x = 2; + #pragma omp task shared (x) depend(in: x) + if (x != 2) + abort (); + } + } +} + +void +firstpriv (void) +{ + #pragma omp parallel + #pragma omp single + { + int x = 1; + #pragma omp task depend(out: x) + x = 2; + #pragma omp task depend(in: x) + if (x != 1) + abort (); + } +} + +void +antidep (void) +{ + int x = 1; + #pragma omp parallel + #pragma omp single + { + #pragma omp task shared(x) depend(in: x) + if (x != 1) + abort (); + #pragma omp task shared(x) depend(out: x) + x = 2; + } +} + +void +antidep2 (void) +{ + #pragma omp parallel + #pragma omp single + { + int x = 1; + #pragma omp taskgroup + { + #pragma omp task shared(x) depend(in: x) + if (x != 1) + abort (); + #pragma omp task shared(x) depend(out: x) + x = 2; + } + } +} + +void +antidep3 (void) +{ + #pragma omp parallel + { + int x = 1; + #pragma omp single + { + #pragma omp task shared(x) depend(in: x) + if (x != 1) + abort (); + #pragma omp task shared(x) depend(out: x) + x = 2; + } + } +} + + +void +outdep (void) +{ + #pragma omp parallel + #pragma omp single + { + int x = 0; + #pragma omp task shared(x) depend(out: x) + x = 1; + #pragma omp task shared(x) depend(out: x) + x = 2; + #pragma omp taskwait + if (x != 2) + abort (); + } +} + +void +concurrent (void) +{ + int x = 1; + #pragma omp parallel + #pragma omp single + { + #pragma omp task shared (x) depend(out: x) + x = 2; + #pragma omp task shared (x) depend(in: x) + if (x != 2) + abort (); + #pragma omp task shared (x) depend(in: x) + if (x != 2) + abort (); + #pragma omp task shared (x) depend(in: x) + if (x != 2) + abort (); + } +} + +void +concurrent2 (void) +{ + #pragma omp parallel + #pragma omp single + { + int x = 1; + #pragma omp task shared (x) depend(out: x) + x = 2; + #pragma omp task shared (x) depend(in: x) + if (x != 2) + abort (); + #pragma omp task shared (x) depend(in: x) + if (x != 2) + abort (); + #pragma omp task shared (x) depend(in: x) + if (x != 2) + abort (); + #pragma omp taskwait + } +} + +void +concurrent3 (void) +{ + #pragma omp parallel + { + int x = 1; + #pragma omp single + { + #pragma omp task shared (x) depend(out: x) + x = 2; + #pragma omp task shared (x) depend(in: x) + if (x != 2) + abort (); + #pragma omp task shared (x) depend(in: x) + if (x != 2) + abort (); + #pragma omp task shared (x) depend(in: x) + if (x != 2) + abort (); + } + } +} + +int +main () +{ + dep (); + dep2 (); + dep3 (); + firstpriv (); + antidep (); + antidep2 (); + antidep3 (); + outdep (); + concurrent (); + concurrent2 (); + concurrent3 (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/depend-2.c b/libgomp/testsuite/libgomp.c/depend-2.c new file mode 100644 index 0000000..2772309 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/depend-2.c @@ -0,0 +1,71 @@ +#include <stdlib.h> +#include <unistd.h> + +void +foo (int do_sleep) +{ + int a[64], i, *p = a + 4, x = 0; + asm volatile ("" : "+r" (p)); + for (i = 0; i < 64; i++) + a[i] = i + 8; + #pragma omp parallel private (i) + { + #pragma omp single nowait + { + for (i = 0; i < 8; i++) + { + #pragma omp task depend(out: a[i * 8 : 4]) + a[i * 8] += (i + 2) * 9; + #pragma omp task depend(out: p[i * 8 : 2]) + p[i * 8] += (i + 3) * 10; + #pragma omp task depend(out: x) + x = 1; + } + for (i = 0; i < 8; i++) + #pragma omp task depend(in: a[i * 8 : 4]) \ + depend(inout: a[i * 8 + 4 : 2]) \ + depend(in: a[0 : 4]) depend(in: x) + { + if (a[0] != 8 + 2 * 9 || x != 1) + abort (); + if (a[i * 8] != i * 8 + 8 + (i + 2) * 9) + abort (); + if (a[4 + i * 8] != 4 + i * 8 + 8 + (i + 3) * 10) + abort (); + p[i * 8] += a[i * 8]; + } + for (i = 0; i < 8; i++) + #pragma omp task depend(inout: a[i * 8 : 4]) \ + depend(in: p[i * 8 : 2]) \ + depend(in: p[0 : 2], x) + { + if (p[0] != 4 + 8 + 3 * 10 + 0 + 8 + 2 * 9 || x != 1) + abort (); + if (a[i * 8] != i * 8 + 8 + (i + 2) * 9) + abort (); + if (a[4 + i * 8] != (4 + i * 8 + 8 + (i + 3) * 10 + + i * 8 + 8 + (i + 2) * 9)) + abort (); + a[i * 8] += 2; + } + for (i = 0; i < 4; i++) + #pragma omp task depend(in: a[i * 16 : 4], a[i * 16 + 8 : 4], x) + { + if (a[i * 16] != i * 16 + 8 + (2 * i + 2) * 9 + 2 || x != 1) + abort (); + if (p[i * 16 + 4] != i * 16 + 8 + 8 + (2 * i + 1 + 2) * 9 + 2) + abort (); + } + } + if (do_sleep) + sleep (1); + } +} + +int +main () +{ + foo (1); + foo (0); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/depend-3.c b/libgomp/testsuite/libgomp.c/depend-3.c new file mode 100644 index 0000000..d565d6e --- /dev/null +++ b/libgomp/testsuite/libgomp.c/depend-3.c @@ -0,0 +1,51 @@ +#include <stdlib.h> +#include <unistd.h> + +int +main () +{ + #pragma omp parallel + #pragma omp single + { + int x = 1, y = 2; + #pragma omp taskgroup + { + #pragma omp task shared (x) depend(in: x) + { + usleep (10000); + if (x != 1) + abort (); + } + #pragma omp taskgroup + { + #pragma omp task shared (x) depend(in: x) + { + usleep (15000); + if (x != 1) + abort (); + } + #pragma omp task shared (y) depend(inout: y) + { + if (y != 2) + abort (); + y = 3; + } + #pragma omp taskgroup + { + #pragma omp task shared (x) depend(in: x) + { + usleep (13000); + if (x != 1) + abort (); + } + #pragma omp taskgroup + { + #pragma omp task shared (x) depend(out: x) + x = 2; + } + } + } + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/depend-4.c b/libgomp/testsuite/libgomp.c/depend-4.c new file mode 100644 index 0000000..a4395ea --- /dev/null +++ b/libgomp/testsuite/libgomp.c/depend-4.c @@ -0,0 +1,56 @@ +#include <stdlib.h> +#include <unistd.h> + +int +main () +{ + #pragma omp parallel + #pragma omp single + { + int x = 1, y = 2, z = 3; + #pragma omp taskgroup + { + #pragma omp task shared (x, y, z) depend(inout: x, y) \ + depend (in: z) if (x > 10) + { + if (x != 1 || y != 2 || z != 3) + abort (); + x = 4; + y = 5; + } + /* The above task has depend clauses, but no dependencies + on earlier tasks, and is if (0), so must be scheduled + immediately. */ + if (x != 4 || y != 5) + abort (); + } + #pragma omp taskgroup + { + #pragma omp task shared (x, y) depend(in: x, y) + { + usleep (10000); + if (x != 4 || y != 5 || z != 3) + abort (); + } + #pragma omp task shared (x, y) depend(in: x, y) + { + usleep (10000); + if (x != 4 || y != 5 || z != 3) + abort (); + } + #pragma omp task shared (x, y, z) depend(inout: x, y) \ + depend (in: z) if (x > 10) + { + if (x != 4 || y != 5 || z != 3) + abort (); + x = 6; + y = 7; + } + /* The above task has depend clauses, and may have dependencies + on earlier tasks, while it is if (0), it can be deferred. */ + } + if (x != 6 || y != 7) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/for-1.c b/libgomp/testsuite/libgomp.c/for-1.c new file mode 100644 index 0000000..e702453 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/for-1.c @@ -0,0 +1,35 @@ +/* { dg-options "-std=gnu99 -fopenmp" } */ + +extern void abort (void); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#define F parallel for +#define G pf +#include "for-1.h" +#undef F +#undef G + +#define F for +#define G f +#include "for-1.h" +#undef F +#undef G + +int +main () +{ + if (test_pf_static () + || test_pf_static32 () + || test_pf_auto () + || test_pf_guided32 () + || test_pf_runtime () + || test_f_static () + || test_f_static32 () + || test_f_auto () + || test_f_guided32 () + || test_f_runtime ()) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/for-1.h b/libgomp/testsuite/libgomp.c/for-1.h new file mode 100644 index 0000000..fa82c5b --- /dev/null +++ b/libgomp/testsuite/libgomp.c/for-1.h @@ -0,0 +1,25 @@ +#define S +#define N(x) M(x, G, static) +#include "for-2.h" +#undef S +#undef N +#define S schedule(static, 32) +#define N(x) M(x, G, static32) +#include "for-2.h" +#undef S +#undef N +#define S schedule(auto) +#define N(x) M(x, G, auto) +#include "for-2.h" +#undef S +#undef N +#define S schedule(guided, 32) +#define N(x) M(x, G, guided32) +#include "for-2.h" +#undef S +#undef N +#define S schedule(runtime) +#define N(x) M(x, G, runtime) +#include "for-2.h" +#undef S +#undef N diff --git a/libgomp/testsuite/libgomp.c/for-2.c b/libgomp/testsuite/libgomp.c/for-2.c new file mode 100644 index 0000000..f5a01ab --- /dev/null +++ b/libgomp/testsuite/libgomp.c/for-2.c @@ -0,0 +1,46 @@ +/* { dg-options "-std=gnu99 -fopenmp" } */ + +extern void abort (void); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#define F simd +#define G simd +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F parallel for simd +#define G pf_simd +#include "for-1.h" +#undef F +#undef G + +#define F for simd +#define G f_simd +#include "for-1.h" +#undef F +#undef G + +int +main () +{ + if (test_simd_normal () + || test_pf_simd_static () + || test_pf_simd_static32 () + || test_pf_simd_auto () + || test_pf_simd_guided32 () + || test_pf_simd_runtime () + || test_f_simd_static () + || test_f_simd_static32 () + || test_f_simd_auto () + || test_f_simd_guided32 () + || test_f_simd_runtime ()) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/for-2.h b/libgomp/testsuite/libgomp.c/for-2.h new file mode 100644 index 0000000..57c385e --- /dev/null +++ b/libgomp/testsuite/libgomp.c/for-2.h @@ -0,0 +1,269 @@ +#ifndef VARS +#define VARS +int a[1500]; +float b[10][15][10]; +__attribute__((noreturn)) void +noreturn (void) +{ + for (;;); +} +#endif + +__attribute__((noinline, noclone)) void +N(f0) (void) +{ + int i; +#pragma omp F S + for (i = 0; i < 1500; i++) + a[i] += 2; +} + +__attribute__((noinline, noclone)) void +N(f1) (void) +{ +#pragma omp F S + for (unsigned int i = __INT_MAX__; i < 3000U + __INT_MAX__; i += 2) + a[(i - __INT_MAX__) >> 1] -= 2; +} + +__attribute__((noinline, noclone)) void +N(f2) (void) +{ + unsigned long long i; +#pragma omp F S + for (i = __LONG_LONG_MAX__ + 4500ULL - 27; + i > __LONG_LONG_MAX__ - 27ULL; i -= 3) + a[(i + 26LL - __LONG_LONG_MAX__) / 3] -= 4; +} + +__attribute__((noinline, noclone)) void +N(f3) (long long n1, long long n2, long long s3) +{ +#pragma omp F S + for (long long i = n1 + 23; i > n2 - 25; i -= s3) + a[i + 48] += 7; +} + +__attribute__((noinline, noclone)) void +N(f4) (void) +{ + unsigned int i; +#pragma omp F S + for (i = 30; i < 20; i += 2) + a[i] += 10; +} + +__attribute__((noinline, noclone)) void +N(f5) (int n11, int n12, int n21, int n22, int n31, int n32, + int s1, int s2, int s3) +{ + int v1, v2, v3; +#pragma omp F S collapse(3) + for (v1 = n11; v1 < n12; v1 += s1) + for (v2 = n21; v2 < n22; v2 += s2) + for (v3 = n31; v3 < n32; v3 += s3) + b[v1][v2][v3] += 2.5; +} + +__attribute__((noinline, noclone)) void +N(f6) (int n11, int n12, int n21, int n22, long long n31, long long n32, + int s1, int s2, long long int s3) +{ + int v1, v2; + long long v3; +#pragma omp F S collapse(3) + for (v1 = n11; v1 > n12; v1 += s1) + for (v2 = n21; v2 > n22; v2 += s2) + for (v3 = n31; v3 > n32; v3 += s3) + b[v1][v2 / 2][v3] -= 4.5; +} + +__attribute__((noinline, noclone)) void +N(f7) (void) +{ + unsigned int v1, v3; + unsigned long long v2; +#pragma omp F S collapse(3) + for (v1 = 0; v1 < 20; v1 += 2) + for (v2 = __LONG_LONG_MAX__ + 16ULL; + v2 > __LONG_LONG_MAX__ - 29ULL; v2 -= 3) + for (v3 = 10; v3 > 0; v3--) + b[v1 >> 1][(v2 - __LONG_LONG_MAX__ + 64) / 3 - 12][v3 - 1] += 5.5; +} + +__attribute__((noinline, noclone)) void +N(f8) (void) +{ + long long v1, v2, v3; +#pragma omp F S collapse(3) + for (v1 = 0; v1 < 20; v1 += 2) + for (v2 = 30; v2 < 20; v2++) + for (v3 = 10; v3 < 0; v3--) + b[v1][v2][v3] += 5.5; +} + +__attribute__((noinline, noclone)) void +N(f9) (void) +{ + int i; +#pragma omp F S + for (i = 20; i < 10; i++) + { + a[i] += 2; + noreturn (); + a[i] -= 4; + } +} + +__attribute__((noinline, noclone)) void +N(f10) (void) +{ + int i; +#pragma omp F S collapse(3) + for (i = 0; i < 10; i++) + for (int j = 10; j < 8; j++) + for (long k = -10; k < 10; k++) + { + b[i][j][k] += 4; + noreturn (); + b[i][j][k] -= 8; + } +} + +__attribute__((noinline, noclone)) void +N(f11) (int n) +{ + int i; +#pragma omp F S + for (i = 20; i < n; i++) + { + a[i] += 8; + noreturn (); + a[i] -= 16; + } +} + +__attribute__((noinline, noclone)) void +N(f12) (int n) +{ + int i; +#pragma omp F S collapse(3) + for (i = 0; i < 10; i++) + for (int j = n; j < 8; j++) + for (long k = -10; k < 10; k++) + { + b[i][j][k] += 16; + noreturn (); + b[i][j][k] -= 32; + } +} + +__attribute__((noinline, noclone)) void +N(f13) (void) +{ + int *i; +#pragma omp F S + for (i = a; i < &a[1500]; i++) + i[0] += 2; +} + +__attribute__((noinline, noclone)) void +N(f14) (void) +{ + float *i; +#pragma omp F S collapse(3) + for (i = &b[0][0][0]; i < &b[0][0][10]; i++) + for (float *j = &b[0][15][0]; j > &b[0][0][0]; j -= 10) + for (float *k = &b[0][0][10]; k > &b[0][0][0]; --k) + b[i - &b[0][0][0]][(j - &b[0][0][0]) / 10 - 1][(k - &b[0][0][0]) - 1] + -= 3.5; +} + +__attribute__((noinline, noclone)) int +N(test) (void) +{ + int i, j, k; + for (i = 0; i < 1500; i++) + a[i] = i - 25; + N(f0) (); + for (i = 0; i < 1500; i++) + if (a[i] != i - 23) + return 1; + N(f1) (); + for (i = 0; i < 1500; i++) + if (a[i] != i - 25) + return 1; + N(f2) (); + for (i = 0; i < 1500; i++) + if (a[i] != i - 29) + return 1; + N(f3) (1500LL - 1 - 23 - 48, -1LL + 25 - 48, 1LL); + for (i = 0; i < 1500; i++) + if (a[i] != i - 22) + return 1; + N(f3) (1500LL - 1 - 23 - 48, 1500LL - 1, 7LL); + for (i = 0; i < 1500; i++) + if (a[i] != i - 22) + return 1; + N(f4) (); + for (i = 0; i < 1500; i++) + if (a[i] != i - 22) + return 1; + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + b[i][j][k] = i - 2.5 + 1.5 * j - 1.5 * k; + N(f5) (0, 10, 0, 15, 0, 10, 1, 1, 1); + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i + 1.5 * j - 1.5 * k) + return 1; + N(f5) (0, 10, 30, 15, 0, 10, 4, 5, 6); + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i + 1.5 * j - 1.5 * k) + return 1; + N(f6) (9, -1, 29, 0, 9, -1, -1, -2, -1); + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i - 4.5 + 1.5 * j - 1.5 * k) + return 1; + N(f7) (); + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k) + return 1; + N(f8) (); + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k) + return 1; + N(f9) (); + N(f10) (); + N(f11) (10); + N(f12) (12); + for (i = 0; i < 1500; i++) + if (a[i] != i - 22) + return 1; + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i + 1.0 + 1.5 * j - 1.5 * k) + return 1; + N(f13) (); + N(f14) (); + for (i = 0; i < 1500; i++) + if (a[i] != i - 20) + return 1; + for (i = 0; i < 10; i++) + for (j = 0; j < 15; j++) + for (k = 0; k < 10; k++) + if (b[i][j][k] != i - 2.5 + 1.5 * j - 1.5 * k) + return 1; + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/for-3.c b/libgomp/testsuite/libgomp.c/for-3.c new file mode 100644 index 0000000..06cbf4f --- /dev/null +++ b/libgomp/testsuite/libgomp.c/for-3.c @@ -0,0 +1,110 @@ +/* { dg-options "-std=gnu99 -fopenmp" } */ + +extern void abort (); + +#define M(x, y, z) O(x, y, z) +#define O(x, y, z) x ## _ ## y ## _ ## z + +#pragma omp declare target + +#define F distribute +#define G d +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F distribute +#define G d_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F distribute simd +#define G ds +#define S +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F distribute simd +#define G ds_ds128 +#define S dist_schedule(static, 128) +#define N(x) M(x, G, normal) +#include "for-2.h" +#undef S +#undef N +#undef F +#undef G + +#define F distribute parallel for +#define G dpf +#include "for-1.h" +#undef F +#undef G + +#define F distribute parallel for dist_schedule(static, 128) +#define G dpf_ds128 +#include "for-1.h" +#undef F +#undef G + +#define F distribute parallel for simd +#define G dpfs +#include "for-1.h" +#undef F +#undef G + +#define F distribute parallel for simd dist_schedule(static, 128) +#define G dpfs_ds128 +#include "for-1.h" +#undef F +#undef G + +#pragma omp end declare target + +int +main () +{ + int err = 0; + #pragma omp target teams reduction(|:err) + { + err |= test_d_normal (); + err |= test_d_ds128_normal (); + err |= test_ds_normal (); + err |= test_ds_ds128_normal (); + err |= test_dpf_static (); + err |= test_dpf_static32 (); + err |= test_dpf_auto (); + err |= test_dpf_guided32 (); + err |= test_dpf_runtime (); + err |= test_dpf_ds128_static (); + err |= test_dpf_ds128_static32 (); + err |= test_dpf_ds128_auto (); + err |= test_dpf_ds128_guided32 (); + err |= test_dpf_ds128_runtime (); + err |= test_dpfs_static (); + err |= test_dpfs_static32 (); + err |= test_dpfs_auto (); + err |= test_dpfs_guided32 (); + err |= test_dpfs_runtime (); + err |= test_dpfs_ds128_static (); + err |= test_dpfs_ds128_static32 (); + err |= test_dpfs_ds128_auto (); + err |= test_dpfs_ds128_guided32 (); + err |= test_dpfs_ds128_runtime (); + } + if (err) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/pr58392.c b/libgomp/testsuite/libgomp.c/pr58392.c new file mode 100644 index 0000000..6ca97ad --- /dev/null +++ b/libgomp/testsuite/libgomp.c/pr58392.c @@ -0,0 +1,58 @@ +/* PR tree-optimization/58392 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +extern void abort (void); +int d[32 * 32]; + +__attribute__((noinline, noclone)) int +foo (int a, int b) +{ + int j, c = 0; + #pragma omp parallel for reduction(+: c) + for (j = 0; j < a; j += 32) + { + int l; + #pragma omp simd reduction(+: c) + for (l = 0; l < b; ++l) + c += d[j + l]; + } + return c; +} + +__attribute__((noinline, noclone)) int +bar (int a) +{ + int j, c = 0; + #pragma omp parallel for simd reduction(+: c) + for (j = 0; j < a; ++j) + c += d[j]; + return c; +} + +__attribute__((noinline)) static int +baz (int a) +{ + int j, c = 0; + #pragma omp simd reduction(+: c) + for (j = 0; j < a; ++j) + c += d[j]; + return c; +} + +int +main () +{ + int i; + for (i = 0; i < 32 * 32; i++) + d[i] = (i & 31); + if (foo (32 * 32, 32) != (31 * 32 / 2) * 32) + abort (); + if (bar (32 * 32) != (31 * 32 / 2) * 32) + abort (); + if (baz (32 * 32) != (31 * 32 / 2) * 32) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/simd-1.c b/libgomp/testsuite/libgomp.c/simd-1.c new file mode 100644 index 0000000..352b3b7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/simd-1.c @@ -0,0 +1,57 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +extern void abort (); +int a[1024] __attribute__((aligned (32))) = { 1 }; +int b[1024] __attribute__((aligned (32))) = { 1 }; +int k, m; +struct U { int u; }; +struct V { int v; }; + +__attribute__((noinline, noclone)) int +foo (int *p) +{ + int i, s = 0; + struct U u; + struct V v; + #pragma omp simd aligned(a, p : 32) linear(k: m + 1) \ + reduction(+:s) lastprivate(u, v) + for (i = 0; i < 1024; i++) + { + a[i] *= p[i]; + u.u = p[i] + k; + k += m + 1; + v.v = p[i] + k; + s += p[i] + k; + } + if (u.u != 36 + 4 + 3 * 1023 || v.v != 36 + 4 + 3 * 1024) + abort (); + return s; +} + +int +main () +{ +#if __SIZEOF_INT__ >= 4 + int i; + k = 4; + m = 2; + for (i = 0; i < 1024; i++) + { + a[i] = i - 512; + b[i] = (i - 51) % 39; + } + int s = foo (b); + for (i = 0; i < 1024; i++) + { + if (b[i] != (i - 51) % 39 + || a[i] != (i - 512) * b[i]) + abort (); + } + if (k != 4 + 3 * 1024 || s != 1596127) + abort (); +#endif + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/simd-2.c b/libgomp/testsuite/libgomp.c/simd-2.c new file mode 100644 index 0000000..b485fcb --- /dev/null +++ b/libgomp/testsuite/libgomp.c/simd-2.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +extern void abort (); +__UINTPTR_TYPE__ arr[1027]; + +__attribute__((noinline, noclone)) void +foo () +{ + int i, v; + #pragma omp simd private (v) safelen(16) + for (i = 0; i < 1027; i++) + arr[i] = (__UINTPTR_TYPE__) &v; +} + +int +main () +{ + int i, j, cnt = 0; + __UINTPTR_TYPE__ arr2[16]; + foo (); + for (i = 0; i < 1027; i++) + { + for (j = 0; j < cnt; j++) + if (arr[i] == arr2[j]) + break; + if (j != cnt) + continue; + if (cnt == 16) + abort (); + arr2[cnt++] = arr[i]; + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/simd-3.c b/libgomp/testsuite/libgomp.c/simd-3.c new file mode 100644 index 0000000..34a3883 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/simd-3.c @@ -0,0 +1,131 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +extern void abort (); +int a[1024] __attribute__((aligned (32))) = { 1 }; +int b[1024] __attribute__((aligned (32))) = { 1 }; +unsigned char c[1024] __attribute__((aligned (32))) = { 1 }; +int k, m; +__UINTPTR_TYPE__ u, u2, u3; + +__attribute__((noinline, noclone)) int +foo (int *p) +{ + int i, s = 0, s2 = 0, t, t2; + #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \ + lastprivate (t2) + for (i = 0; i < 512; i++) + { + a[i] *= p[i]; + t2 = k + p[i]; + k += m + 1; + s += p[i] + k; + c[i]++; + } + #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \ + lastprivate (t, u, u2, u3) + for (i = 512; i < 1024; i++) + { + a[i] *= p[i]; + k += m + 1; + t = k + p[i]; + u = (__UINTPTR_TYPE__) &k; + u2 = (__UINTPTR_TYPE__) &s2; + u3 = (__UINTPTR_TYPE__) &t; + s2 += t; + c[i]++; + } + return s + s2 + t + t2; +} + +__attribute__((noinline, noclone)) long int +bar (int *p, long int n, long int o) +{ + long int i, s = 0, s2 = 0, t, t2; + #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \ + lastprivate (t2) + for (i = 0; i < n; i++) + { + a[i] *= p[i]; + t2 = k + p[i]; + k += m + 1; + s += p[i] + k; + c[i]++; + } + #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \ + lastprivate (t, u, u2, u3) + for (i = n; i < o; i++) + { + a[i] *= p[i]; + k += m + 1; + t = k + p[i]; + u = (__UINTPTR_TYPE__) &k; + u2 = (__UINTPTR_TYPE__) &s2; + u3 = (__UINTPTR_TYPE__) &t; + s2 += t; + c[i]++; + } + return s + s2 + t + t2; +} + +int +main () +{ +#if __SIZEOF_INT__ >= 4 + int i; + k = 4; + m = 2; + for (i = 0; i < 1024; i++) + { + a[i] = i - 512; + b[i] = (i - 51) % 39; + c[i] = (unsigned char) i; + } + int s = foo (b); + for (i = 0; i < 1024; i++) + { + if (b[i] != (i - 51) % 39 + || a[i] != (i - 512) * b[i] + || c[i] != (unsigned char) (i + 1)) + abort (); + a[i] = i - 512; + } + if (k != 4 + 3 * 1024 + || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023])) + abort (); + k = 4; + s = bar (b, 512, 1024); + for (i = 0; i < 1024; i++) + { + if (b[i] != (i - 51) % 39 + || a[i] != (i - 512) * b[i] + || c[i] != (unsigned char) (i + 2)) + abort (); + a[i] = i - 512; + } + if (k != 4 + 3 * 1024 + || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023])) + abort (); + k = 4; + s = bar (b, 511, 1021); + for (i = 0; i < 1021; i++) + { + if (b[i] != (i - 51) % 39 + || a[i] != (i - 512) * b[i] + || c[i] != (unsigned char) (i + 3)) + abort (); + a[i] = i - 512; + } + for (i = 1021; i < 1024; i++) + if (b[i] != (i - 51) % 39 + || a[i] != i - 512 + || c[i] != (unsigned char) (i + 2)) + abort (); + if (k != 4 + 3 * 1021 + || s != 1586803 + (4 + 3 * 510 + b[510]) + (4 + 3 * 1021 + b[1020])) + abort (); +#endif + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/simd-4.c b/libgomp/testsuite/libgomp.c/simd-4.c new file mode 100644 index 0000000..fd87c7e --- /dev/null +++ b/libgomp/testsuite/libgomp.c/simd-4.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +extern void abort (); +int a[1024] __attribute__((aligned (32))) = { 1 }; +struct S { int s; }; +#pragma omp declare reduction (+:struct S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:struct S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:int:omp_out += omp_in) + +__attribute__((noinline, noclone)) int +foo (void) +{ + int i, u = 0; + struct S s, t; + s.s = 0; t.s = 0; + #pragma omp simd aligned(a : 32) reduction(+:s) reduction(foo:t, u) + for (i = 0; i < 1024; i++) + { + int x = a[i]; + s.s += x; + t.s += x; + u += x; + } + if (t.s != s.s || u != s.s) + abort (); + return s.s; +} + +int +main () +{ + int i; + for (i = 0; i < 1024; i++) + a[i] = (i & 31) + (i / 128); + int s = foo (); + if (s != 19456) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/simd-5.c b/libgomp/testsuite/libgomp.c/simd-5.c new file mode 100644 index 0000000..0b6d41e --- /dev/null +++ b/libgomp/testsuite/libgomp.c/simd-5.c @@ -0,0 +1,44 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +extern void abort (); +int a[1024] __attribute__((aligned (32))) = { 1 }; +struct S { int s; }; +#pragma omp declare reduction (+:struct S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:struct S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:int:omp_out += omp_in) + +__attribute__((noinline, noclone)) int +foo (void) +{ + int i, u = 0, q = 0; + struct S s, t; + s.s = 0; t.s = 0; + #pragma omp simd aligned(a : 32) reduction(+:s, q) reduction(foo:t, u) \ + safelen(1) + for (i = 0; i < 1024; i++) + { + int x = a[i]; + s.s += x; + t.s += x; + u += x; + q++; + } + if (t.s != s.s || u != s.s || q != 1024) + abort (); + return s.s; +} + +int +main () +{ + int i; + for (i = 0; i < 1024; i++) + a[i] = (i & 31) + (i / 128); + int s = foo (); + if (s != 19456) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/simd-6.c b/libgomp/testsuite/libgomp.c/simd-6.c new file mode 100644 index 0000000..896f347 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/simd-6.c @@ -0,0 +1,44 @@ +/* PR libgomp/58482 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +extern void abort (); +int a[1024] __attribute__((aligned (32))) = { 1 }; +struct S { int s; }; +#pragma omp declare reduction (+:struct S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:struct S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:int:omp_out += omp_in) + +__attribute__((noinline, noclone)) int +foo (void) +{ + int i, u = 0; + struct S s, t; + s.s = 0; t.s = 0; + #pragma omp parallel for simd aligned(a : 32) reduction(+:s) \ + reduction(foo:t, u) + for (i = 0; i < 1024; i++) + { + int x = a[i]; + s.s += x; + t.s += x; + u += x; + } + if (t.s != s.s || u != s.s) + abort (); + return s.s; +} + +int +main () +{ + int i; + for (i = 0; i < 1024; i++) + a[i] = (i & 31) + (i / 128); + int s = foo (); + if (s != 19456) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-1.c b/libgomp/testsuite/libgomp.c/target-1.c new file mode 100644 index 0000000..f734d3c2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-1.c @@ -0,0 +1,90 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); + +void +fn1 (double *x, double *y, int z) +{ + int i; + for (i = 0; i < z; i++) + { + x[i] = i & 31; + y[i] = (i & 63) - 30; + } +} + +#pragma omp declare target +int tgtv = 6; +int +tgt (void) +{ + #pragma omp atomic update + tgtv++; + return 0; +} +#pragma omp end declare target + +double +fn2 (int x, int y, int z) +{ + double b[1024], c[1024], s = 0; + int i, j; + fn1 (b, c, x); + #pragma omp target data map(to: b) + { + #pragma omp target map(tofrom: c) + #pragma omp teams num_teams(y) thread_limit(z) reduction(+:s) firstprivate(x) + #pragma omp distribute dist_schedule(static, 4) collapse(1) + for (j=0; j < x; j += y) + #pragma omp parallel for reduction(+:s) + for (i = j; i < j + y; i++) + tgt (), s += b[i] * c[i]; + #pragma omp target update from(b, tgtv) + } + return s; +} + +double +fn3 (int x) +{ + double b[1024], c[1024], s = 0; + int i; + fn1 (b, c, x); + #pragma omp target map(to: b, c) + #pragma omp parallel for reduction(+:s) + for (i = 0; i < x; i++) + tgt (), s += b[i] * c[i]; + return s; +} + +double +fn4 (int x, double *p) +{ + double b[1024], c[1024], d[1024], s = 0; + int i; + fn1 (b, c, x); + fn1 (d + x, p + x, x); + #pragma omp target map(to: b, c[0:x], d[x:x]) map(to:p[x:64 + (x & 31)]) + #pragma omp parallel for reduction(+:s) + for (i = 0; i < x; i++) + s += b[i] * c[i] + d[x + i] + p[x + i]; + return s; +} + +int +main () +{ + double a = fn2 (128, 4, 6); + int b = tgtv; + double c = fn3 (61); + #pragma omp target update from(tgtv) + int d = tgtv; + double e[1024]; + double f = fn4 (64, e); + if (a != 13888.0 || b != 6 + 128 || c != 4062.0 || d != 6 + 128 + 61 + || f != 8032.0) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-2.c b/libgomp/testsuite/libgomp.c/target-2.c new file mode 100644 index 0000000..ada8dad --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-2.c @@ -0,0 +1,88 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); + +void +fn1 (double *x, double *y, int z) +{ + int i; + for (i = 0; i < z; i++) + { + x[i] = i & 31; + y[i] = (i & 63) - 30; + } +} + +double +fn2 (int x) +{ + double s = 0; + double b[3 * x], c[3 * x], d[3 * x], e[3 * x]; + int i; + fn1 (b, c, x); + fn1 (e, d + x, x); + #pragma omp target map(to: b, c[:x], d[x:x], e) + #pragma omp parallel for reduction(+:s) + for (i = 0; i < x; i++) + s += b[i] * c[i] + d[x + i] + sizeof (b) - sizeof (c); + return s; +} + +double +fn3 (int x) +{ + double s = 0; + double b[3 * x], c[3 * x], d[3 * x], e[3 * x]; + int i; + fn1 (b, c, x); + fn1 (e, d, x); + #pragma omp target + #pragma omp parallel for reduction(+:s) + for (i = 0; i < x; i++) + s += b[i] * c[i] + d[i]; + return s; +} + +double +fn4 (int x) +{ + double s = 0; + double b[3 * x], c[3 * x], d[3 * x], e[3 * x]; + int i; + fn1 (b, c, x); + fn1 (e, d + x, x); + #pragma omp target data map(from: b, c[:x], d[x:x], e) + { + #pragma omp target update to(b, c[:x], d[x:x], e) + #pragma omp target map(c[:x], d[x:x]) + #pragma omp parallel for reduction(+:s) + for (i = 0; i < x; i++) + { + s += b[i] * c[i] + d[x + i] + sizeof (b) - sizeof (c); + b[i] = i + 0.5; + c[i] = 0.5 - i; + d[x + i] = 0.5 * i; + } + } + for (i = 0; i < x; i++) + if (b[i] != i + 0.5 || c[i] != 0.5 - i || d[x + i] != 0.5 * i) + abort (); + return s; +} + +int +main () +{ + double a = fn2 (128); + if (a != 14080.0) + abort (); + double b = fn3 (128); + if (a != b) + abort (); + double c = fn4 (256); + if (c != 28160.0) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-3.c b/libgomp/testsuite/libgomp.c/target-3.c new file mode 100644 index 0000000..7002cf2 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-3.c @@ -0,0 +1,17 @@ +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + if (omp_get_level ()) + abort (); + #pragma omp target if (0) + if (omp_get_level ()) + abort (); + #pragma omp target if (0) + #pragma omp teams + if (omp_get_level ()) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-4.c b/libgomp/testsuite/libgomp.c/target-4.c new file mode 100644 index 0000000..26e935b --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-4.c @@ -0,0 +1,14 @@ +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + omp_set_dynamic (0); + #pragma omp parallel num_threads (4) + #pragma omp target if (0) + #pragma omp single + if (omp_get_num_threads () != 1) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-5.c b/libgomp/testsuite/libgomp.c/target-5.c new file mode 100644 index 0000000..4367443 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-5.c @@ -0,0 +1,83 @@ +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + int d_o = omp_get_dynamic (); + int n_o = omp_get_nested (); + omp_sched_t s_o; + int c_o; + omp_get_schedule (&s_o, &c_o); + int m_o = omp_get_max_threads (); + omp_set_dynamic (1); + omp_set_nested (1); + omp_set_schedule (omp_sched_static, 2); + omp_set_num_threads (4); + int d = omp_get_dynamic (); + int n = omp_get_nested (); + omp_sched_t s; + int c; + omp_get_schedule (&s, &c); + int m = omp_get_max_threads (); + if (!omp_is_initial_device ()) + abort (); + #pragma omp target if (0) + { + omp_sched_t s_c; + int c_c; + omp_get_schedule (&s_c, &c_c); + if (d_o != omp_get_dynamic () + || n_o != omp_get_nested () + || s_o != s_c + || c_o != c_c + || m_o != omp_get_max_threads ()) + abort (); + omp_set_dynamic (0); + omp_set_nested (0); + omp_set_schedule (omp_sched_dynamic, 4); + omp_set_num_threads (2); + if (!omp_is_initial_device ()) + abort (); + } + if (!omp_is_initial_device ()) + abort (); + omp_sched_t s_c; + int c_c; + omp_get_schedule (&s_c, &c_c); + if (d != omp_get_dynamic () + || n != omp_get_nested () + || s != s_c + || c != c_c + || m != omp_get_max_threads ()) + abort (); + #pragma omp target if (0) + #pragma omp teams + { + omp_sched_t s_c; + int c_c; + omp_get_schedule (&s_c, &c_c); + if (d_o != omp_get_dynamic () + || n_o != omp_get_nested () + || s_o != s_c + || c_o != c_c + || m_o != omp_get_max_threads ()) + abort (); + omp_set_dynamic (0); + omp_set_nested (0); + omp_set_schedule (omp_sched_dynamic, 4); + omp_set_num_threads (2); + if (!omp_is_initial_device ()) + abort (); + } + if (!omp_is_initial_device ()) + abort (); + omp_get_schedule (&s_c, &c_c); + if (d != omp_get_dynamic () + || n != omp_get_nested () + || s != s_c + || c != c_c + || m != omp_get_max_threads ()) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-6.c b/libgomp/testsuite/libgomp.c/target-6.c new file mode 100644 index 0000000..ea35aa4 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-6.c @@ -0,0 +1,68 @@ +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + omp_set_dynamic (0); + omp_set_nested (1); + if (omp_in_parallel ()) + abort (); + #pragma omp parallel num_threads (3) + if (omp_get_thread_num () == 2) + { + if (!omp_in_parallel ()) + abort (); + #pragma omp parallel num_threads (3) + if (omp_get_thread_num () == 1) + { + if (!omp_in_parallel () + || omp_get_level () != 2 + || omp_get_ancestor_thread_num (0) != 0 + || omp_get_ancestor_thread_num (1) != 2 + || omp_get_ancestor_thread_num (2) != 1 + || omp_get_ancestor_thread_num (3) != -1) + abort (); + #pragma omp target if (0) + { + if (omp_in_parallel () + || omp_get_level () != 0 + || omp_get_ancestor_thread_num (0) != 0 + || omp_get_ancestor_thread_num (1) != -1) + abort (); + #pragma omp parallel num_threads (2) + { + if (!omp_in_parallel () + || omp_get_level () != 1 + || omp_get_ancestor_thread_num (0) != 0 + || omp_get_ancestor_thread_num (1) + != omp_get_thread_num () + || omp_get_ancestor_thread_num (2) != -1) + abort (); + } + } + #pragma omp target if (0) + { + #pragma omp teams thread_limit (2) + { + if (omp_in_parallel () + || omp_get_level () != 0 + || omp_get_ancestor_thread_num (0) != 0 + || omp_get_ancestor_thread_num (1) != -1) + abort (); + #pragma omp parallel num_threads (2) + { + if (!omp_in_parallel () + || omp_get_level () != 1 + || omp_get_ancestor_thread_num (0) != 0 + || omp_get_ancestor_thread_num (1) + != omp_get_thread_num () + || omp_get_ancestor_thread_num (2) != -1) + abort (); + } + } + } + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/target-7.c b/libgomp/testsuite/libgomp.c/target-7.c new file mode 100644 index 0000000..90de6c5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/target-7.c @@ -0,0 +1,111 @@ +#include <omp.h> +#include <stdlib.h> + +volatile int v; + +void +foo (int f) +{ + int d = f ? omp_get_num_devices () : omp_get_default_device (); + int h = 5; + #pragma omp target device (d) + if (omp_get_level () != 0) + abort (); + #pragma omp target if (v > 1) + if (omp_get_level () != 0 || !omp_is_initial_device ()) + abort (); + #pragma omp target device (d) if (v > 1) + if (omp_get_level () != 0 || !omp_is_initial_device ()) + abort (); + #pragma omp target if (v <= 1) + if (omp_get_level () != 0 || (f && !omp_is_initial_device ())) + abort (); + #pragma omp target device (d) if (v <= 1) + if (omp_get_level () != 0 || (f && !omp_is_initial_device ())) + abort (); + #pragma omp target if (0) + if (omp_get_level () != 0 || !omp_is_initial_device ()) + abort (); + #pragma omp target device (d) if (0) + if (omp_get_level () != 0 || !omp_is_initial_device ()) + abort (); + #pragma omp target if (1) + if (omp_get_level () != 0 || (f && !omp_is_initial_device ())) + abort (); + #pragma omp target device (d) if (1) + if (omp_get_level () != 0 || (f && !omp_is_initial_device ())) + abort (); + #pragma omp target data device (d) map (to: h) + { + #pragma omp target device (d) + if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 5) + abort (); + #pragma omp target update device (d) from (h) + } + #pragma omp target data if (v > 1) map (to: h) + { + #pragma omp target if (v > 1) + if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 6) + abort (); + #pragma omp target update if (v > 1) from (h) + } + #pragma omp target data device (d) if (v > 1) map (to: h) + { + #pragma omp target device (d) if (v > 1) + if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 7) + abort (); + #pragma omp target update device (d) if (v > 1) from (h) + } + #pragma omp target data if (v <= 1) map (to: h) + { + #pragma omp target if (v <= 1) + if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 8) + abort (); + #pragma omp target update if (v <= 1) from (h) + } + #pragma omp target data device (d) if (v <= 1) map (to: h) + { + #pragma omp target device (d) if (v <= 1) + if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 9) + abort (); + #pragma omp target update device (d) if (v <= 1) from (h) + } + #pragma omp target data if (0) map (to: h) + { + #pragma omp target if (0) + if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 10) + abort (); + #pragma omp target update if (0) from (h) + } + #pragma omp target data device (d) if (0) map (to: h) + { + #pragma omp target device (d) if (0) + if (omp_get_level () != 0 || !omp_is_initial_device () || h++ != 11) + abort (); + #pragma omp target update device (d) if (0) from (h) + } + #pragma omp target data if (1) map (to: h) + { + #pragma omp target if (1) + if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 12) + abort (); + #pragma omp target update if (1) from (h) + } + #pragma omp target data device (d) if (1) map (to: h) + { + #pragma omp target device (d) if (1) + if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 13) + abort (); + #pragma omp target update device (d) if (1) from (h) + } + if (h != 14) + abort (); +} + +int +main () +{ + foo (0); + foo (1); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/taskgroup-1.c b/libgomp/testsuite/libgomp.c/taskgroup-1.c new file mode 100644 index 0000000..641a3bc --- /dev/null +++ b/libgomp/testsuite/libgomp.c/taskgroup-1.c @@ -0,0 +1,83 @@ +extern +#ifdef __cplusplus +"C" +#endif +void abort (void); +int v[16] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; + +int +main () +{ + #pragma omp parallel num_threads (4) + #pragma omp single + { + int i; + #pragma omp taskgroup + { + for (i = 0; i < 16; i += 2) + #pragma omp task + { + #pragma omp task + v[i]++; + #pragma omp task + v[i + 1]++; + } + } + for (i = 0; i < 16; i++) + if (v[i] != i + 2) + abort (); + #pragma omp taskgroup + { + for (i = 0; i < 16; i += 2) + #pragma omp task + { + #pragma omp task + v[i]++; + #pragma omp task + v[i + 1]++; + #pragma omp taskwait + } + } + for (i = 0; i < 16; i++) + if (v[i] != i + 3) + abort (); + #pragma omp taskgroup + { + for (i = 0; i < 16; i += 2) + #pragma omp task + { + #pragma omp task + v[i]++; + v[i + 1]++; + } + #pragma omp taskwait + for (i = 0; i < 16; i += 2) + #pragma omp task + v[i + 1]++; + } + for (i = 0; i < 16; i++) + if (v[i] != i + 4 + (i & 1)) + abort (); + #pragma omp taskgroup + { + for (i = 0; i < 16; i += 2) + { + #pragma omp taskgroup + { + #pragma omp task + v[i]++; + #pragma omp task + v[i + 1]++; + } + if (v[i] != i + 5 || v[i + 1] != i + 7) + abort (); + #pragma omp task + v[i]++; + } + } + for (i = 0; i < 16; i++) + if (v[i] != i + 6) + abort (); + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/thread-limit-1.c b/libgomp/testsuite/libgomp.c/thread-limit-1.c new file mode 100644 index 0000000..6cc716b --- /dev/null +++ b/libgomp/testsuite/libgomp.c/thread-limit-1.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_THREAD_LIMIT "6" } */ + +#include <stdlib.h> +#include <unistd.h> + +int +main () +{ + if (omp_get_thread_limit () != 6) + return 0; + omp_set_dynamic (0); + omp_set_nested (1); + #pragma omp parallel num_threads (3) + if (omp_get_num_threads () != 3) + abort (); + #pragma omp parallel num_threads (3) + if (omp_get_num_threads () != 3) + abort (); + #pragma omp parallel num_threads (8) + if (omp_get_num_threads () > 6) + abort (); + #pragma omp parallel num_threads (6) + if (omp_get_num_threads () != 6) + abort (); + int cnt = 0; + #pragma omp parallel num_threads (5) + #pragma omp parallel num_threads (5) + #pragma omp parallel num_threads (2) + { + int v; + #pragma omp atomic capture + v = ++cnt; + if (v > 6) + abort (); + usleep (10000); + #pragma omp atomic + --cnt; + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/thread-limit-2.c b/libgomp/testsuite/libgomp.c/thread-limit-2.c new file mode 100644 index 0000000..0fc9dae --- /dev/null +++ b/libgomp/testsuite/libgomp.c/thread-limit-2.c @@ -0,0 +1,57 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_THREAD_LIMIT "9" } */ + +#include <stdlib.h> +#include <unistd.h> + +int +main () +{ + if (omp_get_thread_limit () != 9) + return 0; + omp_set_dynamic (0); + #pragma omp parallel num_threads (8) + if (omp_get_num_threads () != 8) + abort (); + #pragma omp parallel num_threads (16) + if (omp_get_num_threads () > 9) + abort (); + #pragma omp target if (0) + #pragma omp teams thread_limit (6) + { + if (omp_get_thread_limit () > 6) + abort (); + if (omp_get_thread_limit () == 6) + { + omp_set_dynamic (0); + omp_set_nested (1); + #pragma omp parallel num_threads (3) + if (omp_get_num_threads () != 3) + abort (); + #pragma omp parallel num_threads (3) + if (omp_get_num_threads () != 3) + abort (); + #pragma omp parallel num_threads (8) + if (omp_get_num_threads () > 6) + abort (); + #pragma omp parallel num_threads (6) + if (omp_get_num_threads () != 6) + abort (); + int cnt = 0; + #pragma omp parallel num_threads (5) + #pragma omp parallel num_threads (5) + #pragma omp parallel num_threads (2) + { + int v; + #pragma omp atomic capture + v = ++cnt; + if (v > 6) + abort (); + usleep (10000); + #pragma omp atomic + --cnt; + } + } + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/thread-limit-3.c b/libgomp/testsuite/libgomp.c/thread-limit-3.c new file mode 100644 index 0000000..af9bd78 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/thread-limit-3.c @@ -0,0 +1,12 @@ +#include <stdlib.h> +#include <omp.h> + +int +main () +{ + #pragma omp target if (0) + #pragma omp teams thread_limit (1) + if (omp_get_thread_limit () != 1) + abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/udr-1.c b/libgomp/testsuite/libgomp.c/udr-1.c new file mode 100644 index 0000000..ea9da72 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/udr-1.c @@ -0,0 +1,81 @@ +/* { dg-do run } */ + +extern +#ifdef __cplusplus +"C" +#endif +void abort (); + +struct S { int s; struct S *t; }; + +void +foo (struct S *out, struct S *in) +{ + out->s += in->s; +} + +void +bar (struct S *x) +{ + if (x->s != 6) abort (); + x->s = 15; +} + +void +baz (struct S *x, struct S *y) +{ + x->s = 6; + x->t = x; + (void) y; +} + +#pragma omp declare reduction (foo: struct S: foo (&omp_out, &omp_in)) \ + initializer (omp_priv = { 8, &omp_priv }) +#pragma omp declare reduction (foo: char, int, short: omp_out += omp_in - 4) \ + initializer (omp_priv = 4) +#pragma omp declare reduction (+: struct S: foo (&omp_out, &omp_in)) \ + initializer (baz (&omp_priv, &omp_orig)) + +void +test (struct S s, struct S t) +{ + int q = 0; + #pragma omp parallel num_threads (4) reduction (+: s, q) reduction (foo: t) + { + if (s.s != 6 || s.t != &s || t.s != 8 || t.t != &t) + abort (); + s.s = 2; + t.s = 3; + q = 1; + } + if (s.s != 12 + 2 * q || t.s != 14 + 3 * q) + abort (); +} + +int +main () +{ + struct S s, t; + s.s = 9; t.s = 10; + int h = 30, v = 2, q = 0; + #pragma omp declare reduction (foo: struct S: omp_out.s *= omp_in.s) \ + initializer (omp_priv = omp_orig) + { + #pragma omp declare reduction (foo: struct S: omp_out.s += omp_in.s) \ + initializer (omp_priv = omp_orig) + #pragma omp parallel num_threads (4) reduction (+: t, q) \ + reduction (min: h) reduction (foo: s, v) + { + if (s.s != 9 || t.s != 6 || v != 4 || h != __INT_MAX__) abort (); + asm volatile ("" : "+m" (s.s), "+m" (t.s)); + asm volatile ("" : "+r" (h), "+r" (v)); + h = t.s; s.s++; t.s++; v++; q++; + } + } + if (h != 6 || s.s != 9 + q * 10 || t.s != 10 + q * 7 || v != 2 + q) + abort (); + s.s = 12; + t.s = 14; + test (s, t); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/udr-2.c b/libgomp/testsuite/libgomp.c/udr-2.c new file mode 100644 index 0000000..b58b5c7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/udr-2.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ + +extern void abort (); + +struct S { int s; }; + +#pragma omp declare reduction (+:struct S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:struct S:omp_out.s += omp_in.s) +#pragma omp declare reduction (foo:int:omp_out += omp_in) + +int +main () +{ + int u = 0, q = 0; + struct S s, t; + s.s = 0; t.s = 0; + #pragma omp parallel reduction(+:s, q) reduction(foo:t, u) + { + if (s.s != 0 || t.s != 0 || u != 0 || q != 0) abort (); + s.s = 6; + t.s = 8; + u = 9; + q++; + } + if (s.s != 6 * q || t.s != 8 * q || u != 9 * q) abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/udr-3.c b/libgomp/testsuite/libgomp.c/udr-3.c new file mode 100644 index 0000000..e0a5b87 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/udr-3.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ + +extern void abort (); + +struct S; +void foo (struct S *, struct S *); +#pragma omp declare reduction (+:struct S:foo (&omp_out, &omp_in)) +struct S { int s; }; + +void +foo (struct S *x, struct S *y) +{ + x->s += y->s; +} + +int +main () +{ + struct S s; + int i = 0; + s.s = 0; + #pragma omp parallel reduction (+:s, i) + { + if (s.s != 0) + abort (); + s.s = 2; + i = 1; + } + if (s.s != 2 * i) + abort (); + return 0; +} diff --git a/libgomp/work.c b/libgomp/work.c index 5912427..9c5a327 100644 --- a/libgomp/work.c +++ b/libgomp/work.c @@ -221,7 +221,10 @@ gomp_work_share_end (void) if (gomp_barrier_last_thread (bstate)) { if (__builtin_expect (thr->ts.last_work_share != NULL, 1)) - free_work_share (team, thr->ts.last_work_share); + { + team->work_shares_to_free = thr->ts.work_share; + free_work_share (team, thr->ts.last_work_share); + } } gomp_team_barrier_wait_end (&team->barrier, bstate); @@ -229,6 +232,32 @@ gomp_work_share_end (void) } /* The current thread is done with its current work sharing construct. + This version implies a cancellable barrier at the end of the work-share. */ + +bool +gomp_work_share_end_cancel (void) +{ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + gomp_barrier_state_t bstate; + + /* Cancellable work sharing constructs cannot be orphaned. */ + bstate = gomp_barrier_wait_cancel_start (&team->barrier); + + if (gomp_barrier_last_thread (bstate)) + { + if (__builtin_expect (thr->ts.last_work_share != NULL, 1)) + { + team->work_shares_to_free = thr->ts.work_share; + free_work_share (team, thr->ts.last_work_share); + } + } + thr->ts.last_work_share = NULL; + + return gomp_team_barrier_wait_cancel_end (&team->barrier, bstate); +} + +/* The current thread is done with its current work sharing construct. This version does NOT imply a barrier at the end of the work-share. */ void @@ -259,6 +288,9 @@ gomp_work_share_end_nowait (void) #endif if (completed == team->nthreads) - free_work_share (team, thr->ts.last_work_share); + { + team->work_shares_to_free = thr->ts.work_share; + free_work_share (team, thr->ts.last_work_share); + } thr->ts.last_work_share = NULL; } |